xref: /netbsd-src/sys/rump/net/lib/libsockin/sockin.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /*	$NetBSD: sockin.c,v 1.42 2014/07/01 05:49:19 rtr Exp $	*/
2 
3 /*
4  * Copyright (c) 2008, 2009 Antti Kantee.  All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: sockin.c,v 1.42 2014/07/01 05:49:19 rtr Exp $");
30 
31 #include <sys/param.h>
32 #include <sys/condvar.h>
33 #include <sys/domain.h>
34 #include <sys/kmem.h>
35 #include <sys/kthread.h>
36 #include <sys/mbuf.h>
37 #include <sys/mutex.h>
38 #include <sys/once.h>
39 #include <sys/poll.h>
40 #include <sys/protosw.h>
41 #include <sys/queue.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/time.h>
45 
46 #include <net/bpf.h>
47 #include <net/if.h>
48 #include <net/radix.h>
49 
50 #include <netinet/in.h>
51 #include <netinet/in_systm.h>
52 #include <netinet/ip.h>
53 
54 #include <rump/rumpuser.h>
55 
56 #include "rump_private.h"
57 #include "sockin_user.h"
58 
59 /*
60  * An inet communication domain which uses the socket interface.
61  * Supports IPv4 & IPv6 UDP/TCP.
62  */
63 
64 DOMAIN_DEFINE(sockindomain);
65 DOMAIN_DEFINE(sockin6domain);
66 
67 static int	sockin_do_init(void);
68 static void	sockin_init(void);
69 static int	sockin_attach(struct socket *, int);
70 static void	sockin_detach(struct socket *);
71 static int	sockin_ioctl(struct socket *, u_long, void *, struct ifnet *);
72 static int	sockin_usrreq(struct socket *, int, struct mbuf *,
73 			      struct mbuf *, struct mbuf *, struct lwp *);
74 static int	sockin_ctloutput(int op, struct socket *, struct sockopt *);
75 
76 static const struct pr_usrreqs sockin_usrreqs = {
77 	.pr_attach = sockin_attach,
78 	.pr_detach = sockin_detach,
79 	.pr_ioctl = sockin_ioctl,
80 	.pr_generic = sockin_usrreq,
81 };
82 
83 const struct protosw sockinsw[] = {
84 {
85 	.pr_type = SOCK_DGRAM,
86 	.pr_domain = &sockindomain,
87 	.pr_protocol = IPPROTO_UDP,
88 	.pr_flags = PR_ATOMIC|PR_ADDR,
89 	.pr_usrreqs = &sockin_usrreqs,
90 	.pr_ctloutput = sockin_ctloutput,
91 },
92 {
93 	.pr_type = SOCK_STREAM,
94 	.pr_domain = &sockindomain,
95 	.pr_protocol = IPPROTO_TCP,
96 	.pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS,
97 	.pr_usrreqs = &sockin_usrreqs,
98 	.pr_ctloutput = sockin_ctloutput,
99 }};
100 const struct protosw sockin6sw[] = {
101 {
102 	.pr_type = SOCK_DGRAM,
103 	.pr_domain = &sockin6domain,
104 	.pr_protocol = IPPROTO_UDP,
105 	.pr_flags = PR_ATOMIC|PR_ADDR,
106 	.pr_usrreqs = &sockin_usrreqs,
107 	.pr_ctloutput = sockin_ctloutput,
108 },
109 {
110 	.pr_type = SOCK_STREAM,
111 	.pr_domain = &sockin6domain,
112 	.pr_protocol = IPPROTO_TCP,
113 	.pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS,
114 	.pr_usrreqs = &sockin_usrreqs,
115 	.pr_ctloutput = sockin_ctloutput,
116 }};
117 
118 struct domain sockindomain = {
119 	.dom_family = PF_INET,
120 	.dom_name = "socket_inet",
121 	.dom_init = sockin_init,
122 	.dom_externalize = NULL,
123 	.dom_dispose = NULL,
124 	.dom_protosw = sockinsw,
125 	.dom_protoswNPROTOSW = &sockinsw[__arraycount(sockinsw)],
126 	.dom_rtattach = rt_inithead,
127 	.dom_rtoffset = 32,
128 	.dom_maxrtkey = sizeof(struct sockaddr_in),
129 	.dom_ifattach = NULL,
130 	.dom_ifdetach = NULL,
131 	.dom_ifqueues = { NULL },
132 	.dom_link = { NULL },
133 	.dom_mowner = MOWNER_INIT("",""),
134 	.dom_rtcache = { NULL },
135 	.dom_sockaddr_cmp = NULL
136 };
137 struct domain sockin6domain = {
138 	.dom_family = PF_INET6,
139 	.dom_name = "socket_inet6",
140 	.dom_init = sockin_init,
141 	.dom_externalize = NULL,
142 	.dom_dispose = NULL,
143 	.dom_protosw = sockin6sw,
144 	.dom_protoswNPROTOSW = &sockin6sw[__arraycount(sockin6sw)],
145 	.dom_rtattach = rt_inithead,
146 	.dom_rtoffset = 32,
147 	.dom_maxrtkey = sizeof(struct sockaddr_in6),
148 	.dom_ifattach = NULL,
149 	.dom_ifdetach = NULL,
150 	.dom_ifqueues = { NULL },
151 	.dom_link = { NULL },
152 	.dom_mowner = MOWNER_INIT("",""),
153 	.dom_rtcache = { NULL },
154 	.dom_sockaddr_cmp = NULL
155 };
156 
157 #define SO2S(so) ((intptr_t)(so->so_internal))
158 #define SOCKIN_SBSIZE 65536
159 
160 struct sockin_unit {
161 	struct socket *su_so;
162 
163 	LIST_ENTRY(sockin_unit) su_entries;
164 };
165 static LIST_HEAD(, sockin_unit) su_ent = LIST_HEAD_INITIALIZER(su_ent);
166 static kmutex_t su_mtx;
167 static bool rebuild;
168 static int nsock;
169 
170 /* XXX: for the bpf hack */
171 static struct ifnet sockin_if;
172 int ifpromisc(struct ifnet *ifp, int pswitch) { return 0; }
173 
174 static int
175 registersock(struct socket *so, int news)
176 {
177 	struct sockin_unit *su;
178 
179 	su = kmem_alloc(sizeof(*su), KM_NOSLEEP);
180 	if (!su)
181 		return ENOMEM;
182 
183 	so->so_internal = (void *)(intptr_t)news;
184 	su->su_so = so;
185 
186 	mutex_enter(&su_mtx);
187 	LIST_INSERT_HEAD(&su_ent, su, su_entries);
188 	nsock++;
189 	rebuild = true;
190 	mutex_exit(&su_mtx);
191 
192 	return 0;
193 }
194 
195 static void
196 removesock(struct socket *so)
197 {
198 	struct sockin_unit *su_iter;
199 
200 	mutex_enter(&su_mtx);
201 	LIST_FOREACH(su_iter, &su_ent, su_entries) {
202 		if (su_iter->su_so == so)
203 			break;
204 	}
205 	if (!su_iter)
206 		panic("no such socket");
207 
208 	LIST_REMOVE(su_iter, su_entries);
209 	nsock--;
210 	rebuild = true;
211 	mutex_exit(&su_mtx);
212 
213 	rumpuser_close(SO2S(su_iter->su_so));
214 	kmem_free(su_iter, sizeof(*su_iter));
215 }
216 
217 static void
218 sockin_process(struct socket *so)
219 {
220 	struct sockaddr_in6 from;
221 	struct iovec io;
222 	struct msghdr rmsg;
223 	struct mbuf *m;
224 	size_t n, plen;
225 	int error;
226 
227 	m = m_gethdr(M_WAIT, MT_DATA);
228 	if (so->so_proto->pr_type == SOCK_DGRAM) {
229 		plen = IP_MAXPACKET;
230 		MEXTMALLOC(m, plen, M_DONTWAIT);
231 	} else {
232 		plen = MCLBYTES;
233 		MCLGET(m, M_DONTWAIT);
234 	}
235 	if ((m->m_flags & M_EXT) == 0) {
236 		m_freem(m);
237 		return;
238 	}
239 
240 	memset(&rmsg, 0, sizeof(rmsg));
241 	io.iov_base = mtod(m, void *);
242 	io.iov_len = plen;
243 	rmsg.msg_iov = &io;
244 	rmsg.msg_iovlen = 1;
245 	rmsg.msg_name = (struct sockaddr *)&from;
246 	rmsg.msg_namelen = sizeof(from);
247 
248 	error = rumpcomp_sockin_recvmsg(SO2S(so), &rmsg, 0, &n);
249 	if (error || n == 0) {
250 		m_freem(m);
251 
252 		/* Treat a TCP socket a goner */
253 		if (error != EAGAIN && so->so_proto->pr_type == SOCK_STREAM) {
254 			mutex_enter(softnet_lock);
255 			soisdisconnected(so);
256 			mutex_exit(softnet_lock);
257 			removesock(so);
258 		}
259 		return;
260 	}
261 	m->m_len = m->m_pkthdr.len = n;
262 
263 	bpf_mtap_af(&sockin_if, AF_UNSPEC, m);
264 
265 	mutex_enter(softnet_lock);
266 	if (so->so_proto->pr_type == SOCK_DGRAM) {
267 		if (!sbappendaddr(&so->so_rcv, rmsg.msg_name, m, NULL)) {
268 			m_freem(m);
269 		}
270 	} else {
271 		sbappendstream(&so->so_rcv, m);
272 	}
273 
274 	sorwakeup(so);
275 	mutex_exit(softnet_lock);
276 }
277 
278 static void
279 sockin_accept(struct socket *so)
280 {
281 	struct socket *nso;
282 	struct sockaddr_in6 sin;
283 	int news, error, slen;
284 
285 	slen = sizeof(sin);
286 	error = rumpcomp_sockin_accept(SO2S(so), (struct sockaddr *)&sin,
287 	    &slen, &news);
288 	if (error)
289 		return;
290 
291 	mutex_enter(softnet_lock);
292 	nso = sonewconn(so, true);
293 	if (nso == NULL)
294 		goto errout;
295 	if (registersock(nso, news) != 0)
296 		goto errout;
297 	mutex_exit(softnet_lock);
298 	return;
299 
300  errout:
301 	rumpuser_close(news);
302 	if (nso)
303 		soclose(nso);
304 	mutex_exit(softnet_lock);
305 }
306 
307 #define POLLTIMEOUT 100	/* check for new entries every 100ms */
308 
309 /* XXX: doesn't handle socket (kernel) locking properly? */
310 static void
311 sockinworker(void *arg)
312 {
313 	struct pollfd *pfds = NULL, *npfds;
314 	struct sockin_unit *su_iter;
315 	struct socket *so;
316 	int cursock = 0, i, rv, error;
317 
318 	/*
319 	 * Loop reading requests.  Check for new sockets periodically
320 	 * (could be smarter, but I'm lazy).
321 	 */
322 	for (;;) {
323 		if (rebuild) {
324 			npfds = NULL;
325 			mutex_enter(&su_mtx);
326 			if (nsock)
327 				npfds = kmem_alloc(nsock * sizeof(*npfds),
328 				    KM_NOSLEEP);
329 			if (npfds || nsock == 0) {
330 				if (pfds)
331 					kmem_free(pfds, cursock*sizeof(*pfds));
332 				pfds = npfds;
333 				cursock = nsock;
334 				rebuild = false;
335 
336 				i = 0;
337 				LIST_FOREACH(su_iter, &su_ent, su_entries) {
338 					pfds[i].fd = SO2S(su_iter->su_so);
339 					pfds[i].events = POLLIN;
340 					pfds[i].revents = 0;
341 					i++;
342 				}
343 				KASSERT(i == nsock);
344 			}
345 			mutex_exit(&su_mtx);
346 		}
347 
348 		/* find affected sockets & process */
349 		error = rumpcomp_sockin_poll(pfds, cursock, POLLTIMEOUT, &rv);
350 		for (i = 0; i < cursock && rv > 0 && error == 0; i++) {
351 			if (pfds[i].revents & POLLIN) {
352 				mutex_enter(&su_mtx);
353 				LIST_FOREACH(su_iter, &su_ent, su_entries) {
354 					if (SO2S(su_iter->su_so)==pfds[i].fd) {
355 						so = su_iter->su_so;
356 						mutex_exit(&su_mtx);
357 						if(so->so_options&SO_ACCEPTCONN)
358 							sockin_accept(so);
359 						else
360 							sockin_process(so);
361 						mutex_enter(&su_mtx);
362 						break;
363 					}
364 				}
365 				/* if we can't find it, just wing it */
366 				KASSERT(rebuild || su_iter);
367 				mutex_exit(&su_mtx);
368 				pfds[i].revents = 0;
369 				rv--;
370 				i = -1;
371 				continue;
372 			}
373 
374 			/* something else?  ignore */
375 			if (pfds[i].revents) {
376 				pfds[i].revents = 0;
377 				rv--;
378 			}
379 		}
380 		KASSERT(rv <= 0);
381 	}
382 
383 }
384 
385 static int
386 sockin_do_init(void)
387 {
388 	int rv;
389 
390 	if (rump_threads) {
391 		if ((rv = kthread_create(PRI_NONE, 0, NULL, sockinworker,
392 		    NULL, NULL, "sockwork")) != 0)
393 			panic("sockin_init: could not create worker thread\n");
394 	} else {
395 		printf("sockin_init: no threads => no worker thread\n");
396 	}
397 	mutex_init(&su_mtx, MUTEX_DEFAULT, IPL_NONE);
398 	strlcpy(sockin_if.if_xname, "sockin0", sizeof(sockin_if.if_xname));
399 	bpf_attach(&sockin_if, DLT_NULL, 0);
400 	return 0;
401 }
402 
403 static void
404 sockin_init(void)
405 {
406 	static ONCE_DECL(init);
407 
408 	RUN_ONCE(&init, sockin_do_init);
409 }
410 
411 static int
412 sockin_attach(struct socket *so, int proto)
413 {
414 	const int type = so->so_proto->pr_type;
415 	int error, news, family;
416 
417 	sosetlock(so);
418 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
419 		error = soreserve(so, SOCKIN_SBSIZE, SOCKIN_SBSIZE);
420 		if (error)
421 			return error;
422 	}
423 
424 	family = so->so_proto->pr_domain->dom_family;
425 	KASSERT(family == PF_INET || family == PF_INET6);
426 	error = rumpcomp_sockin_socket(family, type, 0, &news);
427 	if (error)
428 		return error;
429 
430 	/* For UDP sockets, make sure we can send/recv maximum. */
431 	if (type == SOCK_DGRAM) {
432 		int sbsize = SOCKIN_SBSIZE;
433 		error = rumpcomp_sockin_setsockopt(news,
434 		    SOL_SOCKET, SO_SNDBUF,
435 		    &sbsize, sizeof(sbsize));
436 		sbsize = SOCKIN_SBSIZE;
437 		error = rumpcomp_sockin_setsockopt(news,
438 		    SOL_SOCKET, SO_RCVBUF,
439 		    &sbsize, sizeof(sbsize));
440 	}
441 
442 	if ((error = registersock(so, news)) != 0)
443 		rumpuser_close(news);
444 
445 	return error;
446 }
447 
448 static void
449 sockin_detach(struct socket *so)
450 {
451 	panic("sockin_detach: IMPLEMENT ME\n");
452 }
453 
454 static int
455 sockin_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp)
456 {
457 	return ENOTTY;
458 }
459 
460 static int
461 sockin_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
462 	struct mbuf *control, struct lwp *l)
463 {
464 	int error = 0;
465 
466 	KASSERT(req != PRU_CONTROL);
467 
468 	switch (req) {
469 	case PRU_ACCEPT:
470 		/* we do all the work in the worker thread */
471 		break;
472 
473 	case PRU_BIND:
474 		error = rumpcomp_sockin_bind(SO2S(so),
475 		    mtod(nam, const struct sockaddr *),
476 		    nam->m_len);
477 		break;
478 
479 	case PRU_CONNECT:
480 		error = rumpcomp_sockin_connect(SO2S(so),
481 		    mtod(nam, struct sockaddr *), nam->m_len);
482 		if (error == 0)
483 			soisconnected(so);
484 		break;
485 
486 	case PRU_LISTEN:
487 		error = rumpcomp_sockin_listen(SO2S(so), so->so_qlimit);
488 		break;
489 
490 	case PRU_SEND:
491 	{
492 		struct sockaddr *saddr;
493 		struct msghdr mhdr;
494 		size_t iov_max, i;
495 		struct iovec iov_buf[32], *iov;
496 		struct mbuf *m2;
497 		size_t tot, n;
498 		int s;
499 
500 		bpf_mtap_af(&sockin_if, AF_UNSPEC, m);
501 
502 		memset(&mhdr, 0, sizeof(mhdr));
503 
504 		iov_max = 0;
505 		for (m2 = m; m2 != NULL; m2 = m2->m_next) {
506 			iov_max++;
507 		}
508 
509 		if (iov_max <= __arraycount(iov_buf)) {
510 			iov = iov_buf;
511 		} else {
512 			iov = kmem_alloc(sizeof(struct iovec) * iov_max,
513 			    KM_SLEEP);
514 		}
515 
516 		tot = 0;
517 		for (i = 0, m2 = m; m2 != NULL; m2 = m2->m_next, i++) {
518 			iov[i].iov_base = m2->m_data;
519 			iov[i].iov_len = m2->m_len;
520 			tot += m2->m_len;
521 		}
522 		mhdr.msg_iov = iov;
523 		mhdr.msg_iovlen = i;
524 		s = SO2S(so);
525 
526 		if (nam != NULL) {
527 			saddr = mtod(nam, struct sockaddr *);
528 			mhdr.msg_name = saddr;
529 			mhdr.msg_namelen = saddr->sa_len;
530 		}
531 
532 		rumpcomp_sockin_sendmsg(s, &mhdr, 0, &n);
533 
534 		if (iov != iov_buf)
535 			kmem_free(iov, sizeof(struct iovec) * iov_max);
536 
537 		m_freem(m);
538 		m_freem(control);
539 
540 		/* this assumes too many things to list.. buthey, testing */
541 		if (!rump_threads)
542 			sockin_process(so);
543 	}
544 		break;
545 
546 	case PRU_SHUTDOWN:
547 		removesock(so);
548 		break;
549 
550 	case PRU_SOCKADDR:
551 	case PRU_PEERADDR:
552 	{
553 		int slen = nam->m_len;
554 		enum rumpcomp_sockin_getnametype which;
555 
556 		if (req == PRU_SOCKADDR)
557 			which = RUMPCOMP_SOCKIN_SOCKNAME;
558 		else
559 			which = RUMPCOMP_SOCKIN_PEERNAME;
560 		error = rumpcomp_sockin_getname(SO2S(so),
561 		    mtod(nam, struct sockaddr *), &slen, which);
562 		if (error == 0)
563 			nam->m_len = slen;
564 		break;
565 	}
566 
567 	default:
568 		panic("sockin_usrreq: IMPLEMENT ME, req %d not supported", req);
569 	}
570 
571 	return error;
572 }
573 
574 static int
575 sockin_ctloutput(int op, struct socket *so, struct sockopt *sopt)
576 {
577 
578 	return rumpcomp_sockin_setsockopt(SO2S(so), sopt->sopt_level,
579 	    sopt->sopt_name, sopt->sopt_data, sopt->sopt_size);
580 }
581 
582 int sockin_unavailable(void);
583 int
584 sockin_unavailable(void)
585 {
586 
587         panic("interface not available in with sockin");
588 }
589 __strong_alias(rtrequest,sockin_unavailable);
590 __strong_alias(ifunit,sockin_unavailable);
591 __strong_alias(ifreq_setaddr,sockin_unavailable);
592