xref: /netbsd-src/sys/rump/net/lib/libsockin/sockin.c (revision 6a493d6bc668897c91594964a732d38505b70cbb)
1 /*	$NetBSD: sockin.c,v 1.35 2013/08/29 17:49:21 rmind Exp $	*/
2 
3 /*
4  * Copyright (c) 2008, 2009 Antti Kantee.  All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: sockin.c,v 1.35 2013/08/29 17:49:21 rmind Exp $");
30 
31 #include <sys/param.h>
32 #include <sys/condvar.h>
33 #include <sys/domain.h>
34 #include <sys/kmem.h>
35 #include <sys/kthread.h>
36 #include <sys/mbuf.h>
37 #include <sys/mutex.h>
38 #include <sys/once.h>
39 #include <sys/poll.h>
40 #include <sys/protosw.h>
41 #include <sys/queue.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/time.h>
45 
46 #include <net/bpf.h>
47 #include <net/if.h>
48 #include <net/radix.h>
49 
50 #include <netinet/in.h>
51 #include <netinet/in_systm.h>
52 #include <netinet/ip.h>
53 
54 #include <rump/rumpuser.h>
55 
56 #include "rump_private.h"
57 #include "rumpcomp_user.h"
58 
59 /*
60  * An inet communication domain which uses the socket interface.
61  * Currently supports only IPv4 UDP, but could easily be extended to
62  * support IPv6 and TCP by adding more stuff to the protosw.
63  */
64 
65 DOMAIN_DEFINE(sockindomain);
66 DOMAIN_DEFINE(sockin6domain);
67 
68 static int	sockin_do_init(void);
69 static void	sockin_init(void);
70 static int	sockin_usrreq(struct socket *, int, struct mbuf *,
71 			      struct mbuf *, struct mbuf *, struct lwp *);
72 static int	sockin_ctloutput(int op, struct socket *, struct sockopt *);
73 
74 const struct protosw sockinsw[] = {
75 {
76 	.pr_type = SOCK_DGRAM,
77 	.pr_domain = &sockindomain,
78 	.pr_protocol = IPPROTO_UDP,
79 	.pr_flags = PR_ATOMIC|PR_ADDR,
80 	.pr_usrreq = sockin_usrreq,
81 	.pr_ctloutput = sockin_ctloutput,
82 },
83 {
84 	.pr_type = SOCK_STREAM,
85 	.pr_domain = &sockindomain,
86 	.pr_protocol = IPPROTO_TCP,
87 	.pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS,
88 	.pr_usrreq = sockin_usrreq,
89 	.pr_ctloutput = sockin_ctloutput,
90 }};
91 const struct protosw sockin6sw[] = {
92 {
93 	.pr_type = SOCK_DGRAM,
94 	.pr_domain = &sockin6domain,
95 	.pr_protocol = IPPROTO_UDP,
96 	.pr_flags = PR_ATOMIC|PR_ADDR,
97 	.pr_usrreq = sockin_usrreq,
98 	.pr_ctloutput = sockin_ctloutput,
99 },
100 {
101 	.pr_type = SOCK_STREAM,
102 	.pr_domain = &sockin6domain,
103 	.pr_protocol = IPPROTO_TCP,
104 	.pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS,
105 	.pr_usrreq = sockin_usrreq,
106 	.pr_ctloutput = sockin_ctloutput,
107 }};
108 
109 struct domain sockindomain = {
110 	.dom_family = PF_INET,
111 	.dom_name = "socket_inet",
112 	.dom_init = sockin_init,
113 	.dom_externalize = NULL,
114 	.dom_dispose = NULL,
115 	.dom_protosw = sockinsw,
116 	.dom_protoswNPROTOSW = &sockinsw[__arraycount(sockinsw)],
117 	.dom_rtattach = rt_inithead,
118 	.dom_rtoffset = 32,
119 	.dom_maxrtkey = sizeof(struct sockaddr_in),
120 	.dom_ifattach = NULL,
121 	.dom_ifdetach = NULL,
122 	.dom_ifqueues = { NULL },
123 	.dom_link = { NULL },
124 	.dom_mowner = MOWNER_INIT("",""),
125 	.dom_rtcache = { NULL },
126 	.dom_sockaddr_cmp = NULL
127 };
128 struct domain sockin6domain = {
129 	.dom_family = PF_INET6,
130 	.dom_name = "socket_inet6",
131 	.dom_init = sockin_init,
132 	.dom_externalize = NULL,
133 	.dom_dispose = NULL,
134 	.dom_protosw = sockin6sw,
135 	.dom_protoswNPROTOSW = &sockin6sw[__arraycount(sockin6sw)],
136 	.dom_rtattach = rt_inithead,
137 	.dom_rtoffset = 32,
138 	.dom_maxrtkey = sizeof(struct sockaddr_in6),
139 	.dom_ifattach = NULL,
140 	.dom_ifdetach = NULL,
141 	.dom_ifqueues = { NULL },
142 	.dom_link = { NULL },
143 	.dom_mowner = MOWNER_INIT("",""),
144 	.dom_rtcache = { NULL },
145 	.dom_sockaddr_cmp = NULL
146 };
147 
148 #define SO2S(so) ((intptr_t)(so->so_internal))
149 #define SOCKIN_SBSIZE 65536
150 
151 struct sockin_unit {
152 	struct socket *su_so;
153 
154 	LIST_ENTRY(sockin_unit) su_entries;
155 };
156 static LIST_HEAD(, sockin_unit) su_ent = LIST_HEAD_INITIALIZER(su_ent);
157 static kmutex_t su_mtx;
158 static bool rebuild;
159 static int nsock;
160 
161 /* XXX: for the bpf hack */
162 static struct ifnet sockin_if;
163 int ifpromisc(struct ifnet *ifp, int pswitch) { return 0; }
164 
165 static int
166 registersock(struct socket *so, int news)
167 {
168 	struct sockin_unit *su;
169 
170 	su = kmem_alloc(sizeof(*su), KM_NOSLEEP);
171 	if (!su)
172 		return ENOMEM;
173 
174 	so->so_internal = (void *)(intptr_t)news;
175 	su->su_so = so;
176 
177 	mutex_enter(&su_mtx);
178 	LIST_INSERT_HEAD(&su_ent, su, su_entries);
179 	nsock++;
180 	rebuild = true;
181 	mutex_exit(&su_mtx);
182 
183 	return 0;
184 }
185 
186 static void
187 removesock(struct socket *so)
188 {
189 	struct sockin_unit *su_iter;
190 
191 	mutex_enter(&su_mtx);
192 	LIST_FOREACH(su_iter, &su_ent, su_entries) {
193 		if (su_iter->su_so == so)
194 			break;
195 	}
196 	if (!su_iter)
197 		panic("no such socket");
198 
199 	LIST_REMOVE(su_iter, su_entries);
200 	nsock--;
201 	rebuild = true;
202 	mutex_exit(&su_mtx);
203 
204 	rumpuser_close(SO2S(su_iter->su_so));
205 	kmem_free(su_iter, sizeof(*su_iter));
206 }
207 
208 static void
209 sockin_process(struct socket *so)
210 {
211 	struct sockaddr_in6 from;
212 	struct iovec io;
213 	struct msghdr rmsg;
214 	struct mbuf *m;
215 	size_t n, plen;
216 	int error;
217 
218 	m = m_gethdr(M_WAIT, MT_DATA);
219 	if (so->so_proto->pr_type == SOCK_DGRAM) {
220 		plen = IP_MAXPACKET;
221 		MEXTMALLOC(m, plen, M_DONTWAIT);
222 	} else {
223 		plen = MCLBYTES;
224 		MCLGET(m, M_DONTWAIT);
225 	}
226 	if ((m->m_flags & M_EXT) == 0) {
227 		m_freem(m);
228 		return;
229 	}
230 
231 	memset(&rmsg, 0, sizeof(rmsg));
232 	io.iov_base = mtod(m, void *);
233 	io.iov_len = plen;
234 	rmsg.msg_iov = &io;
235 	rmsg.msg_iovlen = 1;
236 	rmsg.msg_name = (struct sockaddr *)&from;
237 	rmsg.msg_namelen = sizeof(from);
238 
239 	error = rumpcomp_sockin_recvmsg(SO2S(so), &rmsg, 0, &n);
240 	if (error || n == 0) {
241 		m_freem(m);
242 
243 		/* Treat a TCP socket a goner */
244 		if (error != EAGAIN && so->so_proto->pr_type == SOCK_STREAM) {
245 			mutex_enter(softnet_lock);
246 			soisdisconnected(so);
247 			mutex_exit(softnet_lock);
248 			removesock(so);
249 		}
250 		return;
251 	}
252 	m->m_len = m->m_pkthdr.len = n;
253 
254 	bpf_mtap_af(&sockin_if, AF_UNSPEC, m);
255 
256 	mutex_enter(softnet_lock);
257 	if (so->so_proto->pr_type == SOCK_DGRAM) {
258 		if (!sbappendaddr(&so->so_rcv, rmsg.msg_name, m, NULL)) {
259 			m_freem(m);
260 		}
261 	} else {
262 		sbappendstream(&so->so_rcv, m);
263 	}
264 
265 	sorwakeup(so);
266 	mutex_exit(softnet_lock);
267 }
268 
269 static void
270 sockin_accept(struct socket *so)
271 {
272 	struct socket *nso;
273 	struct sockaddr_in6 sin;
274 	int news, error, slen;
275 
276 	slen = sizeof(sin);
277 	error = rumpcomp_sockin_accept(SO2S(so), (struct sockaddr *)&sin,
278 	    &slen, &news);
279 	if (error)
280 		return;
281 
282 	mutex_enter(softnet_lock);
283 	nso = sonewconn(so, true);
284 	if (nso == NULL)
285 		goto errout;
286 	if (registersock(nso, news) != 0)
287 		goto errout;
288 	mutex_exit(softnet_lock);
289 	return;
290 
291  errout:
292 	rumpuser_close(news);
293 	if (nso)
294 		soclose(nso);
295 	mutex_exit(softnet_lock);
296 }
297 
298 #define POLLTIMEOUT 100	/* check for new entries every 100ms */
299 
300 /* XXX: doesn't handle socket (kernel) locking properly? */
301 static void
302 sockinworker(void *arg)
303 {
304 	struct pollfd *pfds = NULL, *npfds;
305 	struct sockin_unit *su_iter;
306 	struct socket *so;
307 	int cursock = 0, i, rv, error;
308 
309 	/*
310 	 * Loop reading requests.  Check for new sockets periodically
311 	 * (could be smarter, but I'm lazy).
312 	 */
313 	for (;;) {
314 		if (rebuild) {
315 			npfds = NULL;
316 			mutex_enter(&su_mtx);
317 			if (nsock)
318 				npfds = kmem_alloc(nsock * sizeof(*npfds),
319 				    KM_NOSLEEP);
320 			if (npfds || nsock == 0) {
321 				if (pfds)
322 					kmem_free(pfds, cursock*sizeof(*pfds));
323 				pfds = npfds;
324 				cursock = nsock;
325 				rebuild = false;
326 
327 				i = 0;
328 				LIST_FOREACH(su_iter, &su_ent, su_entries) {
329 					pfds[i].fd = SO2S(su_iter->su_so);
330 					pfds[i].events = POLLIN;
331 					pfds[i].revents = 0;
332 					i++;
333 				}
334 				KASSERT(i == nsock);
335 			}
336 			mutex_exit(&su_mtx);
337 		}
338 
339 		/* find affected sockets & process */
340 		error = rumpcomp_sockin_poll(pfds, cursock, POLLTIMEOUT, &rv);
341 		for (i = 0; i < cursock && rv > 0 && error == 0; i++) {
342 			if (pfds[i].revents & POLLIN) {
343 				mutex_enter(&su_mtx);
344 				LIST_FOREACH(su_iter, &su_ent, su_entries) {
345 					if (SO2S(su_iter->su_so)==pfds[i].fd) {
346 						so = su_iter->su_so;
347 						mutex_exit(&su_mtx);
348 						if(so->so_options&SO_ACCEPTCONN)
349 							sockin_accept(so);
350 						else
351 							sockin_process(so);
352 						mutex_enter(&su_mtx);
353 						break;
354 					}
355 				}
356 				/* if we can't find it, just wing it */
357 				KASSERT(rebuild || su_iter);
358 				mutex_exit(&su_mtx);
359 				pfds[i].revents = 0;
360 				rv--;
361 				i = -1;
362 				continue;
363 			}
364 
365 			/* something else?  ignore */
366 			if (pfds[i].revents) {
367 				pfds[i].revents = 0;
368 				rv--;
369 			}
370 		}
371 		KASSERT(rv <= 0);
372 	}
373 
374 }
375 
376 static int
377 sockin_do_init(void)
378 {
379 	int rv;
380 
381 	if (rump_threads) {
382 		if ((rv = kthread_create(PRI_NONE, 0, NULL, sockinworker,
383 		    NULL, NULL, "sockwork")) != 0)
384 			panic("sockin_init: could not create worker thread\n");
385 	} else {
386 		printf("sockin_init: no threads => no worker thread\n");
387 	}
388 	mutex_init(&su_mtx, MUTEX_DEFAULT, IPL_NONE);
389 	strlcpy(sockin_if.if_xname, "sockin0", sizeof(sockin_if.if_xname));
390 	bpf_attach(&sockin_if, DLT_NULL, 0);
391 	return 0;
392 }
393 
394 static void
395 sockin_init(void)
396 {
397 	static ONCE_DECL(init);
398 
399 	RUN_ONCE(&init, sockin_do_init);
400 }
401 
402 static int
403 sockin_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
404 	struct mbuf *control, struct lwp *l)
405 {
406 	int error = 0;
407 
408 	switch (req) {
409 	case PRU_ATTACH:
410 	{
411 		int news;
412 		int sbsize;
413 		int family;
414 
415 		sosetlock(so);
416 		if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
417 			error = soreserve(so, SOCKIN_SBSIZE, SOCKIN_SBSIZE);
418 			if (error)
419 				break;
420 		}
421 
422 		family = so->so_proto->pr_domain->dom_family;
423 		KASSERT(family == PF_INET || family == PF_INET6);
424 		error = rumpcomp_sockin_socket(family,
425 		    so->so_proto->pr_type, 0, &news);
426 		if (error)
427 			break;
428 
429 		/* for UDP sockets, make sure we can send&recv max */
430 		if (so->so_proto->pr_type == SOCK_DGRAM) {
431 			sbsize = SOCKIN_SBSIZE;
432 			error = rumpcomp_sockin_setsockopt(news,
433 			    SOL_SOCKET, SO_SNDBUF,
434 			    &sbsize, sizeof(sbsize));
435 			sbsize = SOCKIN_SBSIZE;
436 			error = rumpcomp_sockin_setsockopt(news,
437 			    SOL_SOCKET, SO_RCVBUF,
438 			    &sbsize, sizeof(sbsize));
439 		}
440 
441 		if ((error = registersock(so, news)) != 0)
442 			rumpuser_close(news);
443 
444 		break;
445 	}
446 
447 	case PRU_ACCEPT:
448 		/* we do all the work in the worker thread */
449 		break;
450 
451 	case PRU_BIND:
452 		error = rumpcomp_sockin_bind(SO2S(so),
453 		    mtod(nam, const struct sockaddr *),
454 		    nam->m_len);
455 		break;
456 
457 	case PRU_CONNECT:
458 		error = rumpcomp_sockin_connect(SO2S(so),
459 		    mtod(nam, struct sockaddr *), nam->m_len);
460 		if (error == 0)
461 			soisconnected(so);
462 		break;
463 
464 	case PRU_LISTEN:
465 		error = rumpcomp_sockin_listen(SO2S(so), so->so_qlimit);
466 		break;
467 
468 	case PRU_SEND:
469 	{
470 		struct sockaddr *saddr;
471 		struct msghdr mhdr;
472 		size_t iov_max, i;
473 		struct iovec iov_buf[32], *iov;
474 		struct mbuf *m2;
475 		size_t tot, n;
476 		int s;
477 
478 		bpf_mtap_af(&sockin_if, AF_UNSPEC, m);
479 
480 		memset(&mhdr, 0, sizeof(mhdr));
481 
482 		iov_max = 0;
483 		for (m2 = m; m2 != NULL; m2 = m2->m_next) {
484 			iov_max++;
485 		}
486 
487 		if (iov_max <= __arraycount(iov_buf)) {
488 			iov = iov_buf;
489 		} else {
490 			iov = kmem_alloc(sizeof(struct iovec) * iov_max,
491 			    KM_SLEEP);
492 		}
493 
494 		tot = 0;
495 		for (i = 0, m2 = m; m2 != NULL; m2 = m2->m_next, i++) {
496 			iov[i].iov_base = m2->m_data;
497 			iov[i].iov_len = m2->m_len;
498 			tot += m2->m_len;
499 		}
500 		mhdr.msg_iov = iov;
501 		mhdr.msg_iovlen = i;
502 		s = SO2S(so);
503 
504 		if (nam != NULL) {
505 			saddr = mtod(nam, struct sockaddr *);
506 			mhdr.msg_name = saddr;
507 			mhdr.msg_namelen = saddr->sa_len;
508 		}
509 
510 		rumpcomp_sockin_sendmsg(s, &mhdr, 0, &n);
511 
512 		if (iov != iov_buf)
513 			kmem_free(iov, sizeof(struct iovec) * iov_max);
514 
515 		m_freem(m);
516 		m_freem(control);
517 
518 		/* this assumes too many things to list.. buthey, testing */
519 		if (!rump_threads)
520 			sockin_process(so);
521 	}
522 		break;
523 
524 	case PRU_SHUTDOWN:
525 		removesock(so);
526 		break;
527 
528 	case PRU_SOCKADDR:
529 	case PRU_PEERADDR:
530 	{
531 		int slen = nam->m_len;
532 		enum rumpcomp_sockin_getnametype which;
533 
534 		if (req == PRU_SOCKADDR)
535 			which = RUMPCOMP_SOCKIN_SOCKNAME;
536 		else
537 			which = RUMPCOMP_SOCKIN_PEERNAME;
538 		error = rumpcomp_sockin_getname(SO2S(so),
539 		    mtod(nam, struct sockaddr *), &slen, which);
540 		if (error == 0)
541 			nam->m_len = slen;
542 		break;
543 	}
544 
545 	case PRU_CONTROL:
546 		error = ENOTTY;
547 		break;
548 
549 	default:
550 		panic("sockin_usrreq: IMPLEMENT ME, req %d not supported", req);
551 	}
552 
553 	return error;
554 }
555 
556 static int
557 sockin_ctloutput(int op, struct socket *so, struct sockopt *sopt)
558 {
559 
560 	return rumpcomp_sockin_setsockopt(SO2S(so), sopt->sopt_level,
561 	    sopt->sopt_name, sopt->sopt_data, sopt->sopt_size);
562 }
563