xref: /netbsd-src/sys/rump/net/lib/libsockin/sockin.c (revision a4ddc2c8fb9af816efe3b1c375a5530aef0e89e9)
1 /*	$NetBSD: sockin.c,v 1.27 2013/03/18 13:14:11 pooka Exp $	*/
2 
3 /*
4  * Copyright (c) 2008, 2009 Antti Kantee.  All Rights Reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: sockin.c,v 1.27 2013/03/18 13:14:11 pooka Exp $");
30 
31 #include <sys/param.h>
32 #include <sys/condvar.h>
33 #include <sys/domain.h>
34 #include <sys/kmem.h>
35 #include <sys/kthread.h>
36 #include <sys/mbuf.h>
37 #include <sys/mutex.h>
38 #include <sys/poll.h>
39 #include <sys/protosw.h>
40 #include <sys/queue.h>
41 #include <sys/socket.h>
42 #include <sys/socketvar.h>
43 #include <sys/time.h>
44 
45 #include <net/bpf.h>
46 #include <net/if.h>
47 #include <net/radix.h>
48 
49 #include <netinet/in.h>
50 #include <netinet/in_systm.h>
51 #include <netinet/ip.h>
52 
53 #include <rump/rumpuser.h>
54 
55 #include "rump_private.h"
56 #include "rumpcomp_user.h"
57 
58 /*
59  * An inet communication domain which uses the socket interface.
60  * Currently supports only IPv4 UDP, but could easily be extended to
61  * support IPv6 and TCP by adding more stuff to the protosw.
62  */
63 
64 DOMAIN_DEFINE(sockindomain);
65 
66 static void	sockin_init(void);
67 static int	sockin_usrreq(struct socket *, int, struct mbuf *,
68 			      struct mbuf *, struct mbuf *, struct lwp *);
69 static int	sockin_ctloutput(int op, struct socket *, struct sockopt *);
70 
71 const struct protosw sockinsw[] = {
72 {
73 	.pr_type = SOCK_DGRAM,
74 	.pr_domain = &sockindomain,
75 	.pr_protocol = IPPROTO_UDP,
76 	.pr_flags = PR_ATOMIC|PR_ADDR,
77 	.pr_usrreq = sockin_usrreq,
78 	.pr_ctloutput = sockin_ctloutput,
79 },
80 {
81 	.pr_type = SOCK_STREAM,
82 	.pr_domain = &sockindomain,
83 	.pr_protocol = IPPROTO_TCP,
84 	.pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS,
85 	.pr_usrreq = sockin_usrreq,
86 	.pr_ctloutput = sockin_ctloutput,
87 }};
88 
89 struct domain sockindomain = {
90 	.dom_family = PF_INET,
91 	.dom_name = "socket_inet",
92 	.dom_init = sockin_init,
93 	.dom_externalize = NULL,
94 	.dom_dispose = NULL,
95 	.dom_protosw = sockinsw,
96 	.dom_protoswNPROTOSW = &sockinsw[__arraycount(sockinsw)],
97 	.dom_rtattach = rt_inithead,
98 	.dom_rtoffset = 32,
99 	.dom_maxrtkey = sizeof(struct sockaddr_in),
100 	.dom_ifattach = NULL,
101 	.dom_ifdetach = NULL,
102 	.dom_ifqueues = { NULL },
103 	.dom_link = { NULL },
104 	.dom_mowner = MOWNER_INIT("",""),
105 	.dom_rtcache = { NULL },
106 	.dom_sockaddr_cmp = NULL
107 };
108 
109 #define SO2S(so) ((intptr_t)(so->so_internal))
110 #define SOCKIN_SBSIZE 65536
111 
112 struct sockin_unit {
113 	struct socket *su_so;
114 
115 	LIST_ENTRY(sockin_unit) su_entries;
116 };
117 static LIST_HEAD(, sockin_unit) su_ent = LIST_HEAD_INITIALIZER(su_ent);
118 static kmutex_t su_mtx;
119 static bool rebuild;
120 static int nsock;
121 
122 /* XXX: for the bpf hack */
123 static struct ifnet sockin_if;
124 int ifpromisc(struct ifnet *ifp, int pswitch) { return 0; }
125 
126 static int
127 registersock(struct socket *so, int news)
128 {
129 	struct sockin_unit *su;
130 
131 	su = kmem_alloc(sizeof(*su), KM_NOSLEEP);
132 	if (!su)
133 		return ENOMEM;
134 
135 	so->so_internal = (void *)(intptr_t)news;
136 	su->su_so = so;
137 
138 	mutex_enter(&su_mtx);
139 	LIST_INSERT_HEAD(&su_ent, su, su_entries);
140 	nsock++;
141 	rebuild = true;
142 	mutex_exit(&su_mtx);
143 
144 	return 0;
145 }
146 
147 static void
148 removesock(struct socket *so)
149 {
150 	struct sockin_unit *su_iter;
151 	int error;
152 
153 	mutex_enter(&su_mtx);
154 	LIST_FOREACH(su_iter, &su_ent, su_entries) {
155 		if (su_iter->su_so == so)
156 			break;
157 	}
158 	if (!su_iter)
159 		panic("no such socket");
160 
161 	LIST_REMOVE(su_iter, su_entries);
162 	nsock--;
163 	rebuild = true;
164 	mutex_exit(&su_mtx);
165 
166 	rumpuser_close(SO2S(su_iter->su_so), &error);
167 	kmem_free(su_iter, sizeof(*su_iter));
168 }
169 
170 static void
171 sockin_process(struct socket *so)
172 {
173 	struct sockaddr_in from;
174 	struct iovec io;
175 	struct msghdr rmsg;
176 	struct mbuf *m;
177 	ssize_t n;
178 	size_t plen;
179 	int error;
180 
181 	m = m_gethdr(M_WAIT, MT_DATA);
182 	if (so->so_proto->pr_type == SOCK_DGRAM) {
183 		plen = IP_MAXPACKET;
184 		MEXTMALLOC(m, plen, M_DONTWAIT);
185 	} else {
186 		plen = MCLBYTES;
187 		MCLGET(m, M_DONTWAIT);
188 	}
189 	if ((m->m_flags & M_EXT) == 0) {
190 		m_freem(m);
191 		return;
192 	}
193 
194 	memset(&rmsg, 0, sizeof(rmsg));
195 	io.iov_base = mtod(m, void *);
196 	io.iov_len = plen;
197 	rmsg.msg_iov = &io;
198 	rmsg.msg_iovlen = 1;
199 	rmsg.msg_name = (struct sockaddr *)&from;
200 	rmsg.msg_namelen = sizeof(from);
201 
202 	n = rumpcomp_sockin_recvmsg(SO2S(so), &rmsg, 0, &error);
203 	if (n <= 0) {
204 		m_freem(m);
205 
206 		/* Treat a TCP socket a goner */
207 		if (error != EAGAIN && so->so_proto->pr_type == SOCK_STREAM) {
208 			mutex_enter(softnet_lock);
209 			soisdisconnected(so);
210 			mutex_exit(softnet_lock);
211 			removesock(so);
212 		}
213 		return;
214 	}
215 	m->m_len = m->m_pkthdr.len = n;
216 
217 	bpf_mtap_af(&sockin_if, AF_UNSPEC, m);
218 
219 	mutex_enter(softnet_lock);
220 	if (so->so_proto->pr_type == SOCK_DGRAM) {
221 		if (!sbappendaddr(&so->so_rcv, rmsg.msg_name, m, NULL)) {
222 			m_freem(m);
223 		}
224 	} else {
225 		sbappendstream(&so->so_rcv, m);
226 	}
227 
228 	sorwakeup(so);
229 	mutex_exit(softnet_lock);
230 }
231 
232 static void
233 sockin_accept(struct socket *so)
234 {
235 	struct socket *nso;
236 	struct sockaddr_in sin;
237 	int news, error, slen;
238 
239 	slen = sizeof(sin);
240 	news = rumpcomp_sockin_accept(SO2S(so), (struct sockaddr *)&sin,
241 	    &slen, &error);
242 	if (news == -1)
243 		return;
244 
245 	mutex_enter(softnet_lock);
246 	nso = sonewconn(so, SS_ISCONNECTED);
247 	if (nso == NULL)
248 		goto errout;
249 	if (registersock(nso, news) != 0)
250 		goto errout;
251 	mutex_exit(softnet_lock);
252 	return;
253 
254  errout:
255 	rumpuser_close(news, &error);
256 	if (nso)
257 		soclose(nso);
258 	mutex_exit(softnet_lock);
259 }
260 
261 #define POLLTIMEOUT 100	/* check for new entries every 100ms */
262 
263 /* XXX: doesn't handle socket (kernel) locking properly? */
264 static void
265 sockinworker(void *arg)
266 {
267 	struct pollfd *pfds = NULL, *npfds;
268 	struct sockin_unit *su_iter;
269 	struct socket *so;
270 	int cursock = 0, i, rv, error;
271 
272 	/*
273 	 * Loop reading requests.  Check for new sockets periodically
274 	 * (could be smarter, but I'm lazy).
275 	 */
276 	for (;;) {
277 		if (rebuild) {
278 			npfds = NULL;
279 			mutex_enter(&su_mtx);
280 			if (nsock)
281 				npfds = kmem_alloc(nsock * sizeof(*npfds),
282 				    KM_NOSLEEP);
283 			if (npfds || nsock == 0) {
284 				if (pfds)
285 					kmem_free(pfds, cursock*sizeof(*pfds));
286 				pfds = npfds;
287 				cursock = nsock;
288 				rebuild = false;
289 
290 				i = 0;
291 				LIST_FOREACH(su_iter, &su_ent, su_entries) {
292 					pfds[i].fd = SO2S(su_iter->su_so);
293 					pfds[i].events = POLLIN;
294 					pfds[i].revents = 0;
295 					i++;
296 				}
297 				KASSERT(i == nsock);
298 			}
299 			mutex_exit(&su_mtx);
300 		}
301 
302 		/* find affected sockets & process */
303 		rv = rumpuser_poll(pfds, cursock, POLLTIMEOUT, &error);
304 		for (i = 0; i < cursock && rv > 0; i++) {
305 			if (pfds[i].revents & POLLIN) {
306 				mutex_enter(&su_mtx);
307 				LIST_FOREACH(su_iter, &su_ent, su_entries) {
308 					if (SO2S(su_iter->su_so)==pfds[i].fd) {
309 						so = su_iter->su_so;
310 						mutex_exit(&su_mtx);
311 						if(so->so_options&SO_ACCEPTCONN)
312 							sockin_accept(so);
313 						else
314 							sockin_process(so);
315 						mutex_enter(&su_mtx);
316 						break;
317 					}
318 				}
319 				/* if we can't find it, just wing it */
320 				KASSERT(rebuild || su_iter);
321 				mutex_exit(&su_mtx);
322 				pfds[i].revents = 0;
323 				rv--;
324 				i = -1;
325 				continue;
326 			}
327 
328 			/* something else?  ignore */
329 			if (pfds[i].revents) {
330 				pfds[i].revents = 0;
331 				rv--;
332 			}
333 		}
334 		KASSERT(rv <= 0);
335 	}
336 
337 }
338 
339 static void
340 sockin_init(void)
341 {
342 	int rv;
343 
344 	if (rump_threads) {
345 		if ((rv = kthread_create(PRI_NONE, 0, NULL, sockinworker,
346 		    NULL, NULL, "sockwork")) != 0)
347 			panic("sockin_init: could not create worker thread\n");
348 	} else {
349 		printf("sockin_init: no threads => no worker thread\n");
350 	}
351 	mutex_init(&su_mtx, MUTEX_DEFAULT, IPL_NONE);
352 	strlcpy(sockin_if.if_xname, "sockin0", sizeof(sockin_if.if_xname));
353 	bpf_attach(&sockin_if, DLT_NULL, 0);
354 }
355 
356 static int
357 sockin_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
358 	struct mbuf *control, struct lwp *l)
359 {
360 	int error = 0, rv;
361 
362 	switch (req) {
363 	case PRU_ATTACH:
364 	{
365 		int news, dummy;
366 		int sbsize;
367 
368 		sosetlock(so);
369 		if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
370 			error = soreserve(so, SOCKIN_SBSIZE, SOCKIN_SBSIZE);
371 			if (error)
372 				break;
373 		}
374 
375 		news = rumpcomp_sockin_socket(PF_INET, so->so_proto->pr_type,
376 		    0, &error);
377 		if (news == -1)
378 			break;
379 
380 		/* for UDP sockets, make sure we can send&recv max */
381 		if (so->so_proto->pr_type == SOCK_DGRAM) {
382 			sbsize = SOCKIN_SBSIZE;
383 			rumpcomp_sockin_setsockopt(news,
384 			    SOL_SOCKET, SO_SNDBUF,
385 			    &sbsize, sizeof(sbsize), &error);
386 			sbsize = SOCKIN_SBSIZE;
387 			rumpcomp_sockin_setsockopt(news,
388 			    SOL_SOCKET, SO_RCVBUF,
389 			    &sbsize, sizeof(sbsize), &error);
390 		}
391 
392 		if ((error = registersock(so, news)) != 0)
393 			rumpuser_close(news, &dummy);
394 
395 		break;
396 	}
397 
398 	case PRU_ACCEPT:
399 		/* we do all the work in the worker thread */
400 		break;
401 
402 	case PRU_BIND:
403 		rumpcomp_sockin_bind(SO2S(so),
404 		    mtod(nam, const struct sockaddr *),
405 		    sizeof(struct sockaddr_in), &error);
406 		break;
407 
408 	case PRU_CONNECT:
409 		rv = rumpcomp_sockin_connect(SO2S(so),
410 		    mtod(nam, struct sockaddr *), sizeof(struct sockaddr_in),
411 		    &error);
412 		if (rv == 0)
413 			soisconnected(so);
414 		break;
415 
416 	case PRU_LISTEN:
417 		rumpcomp_sockin_listen(SO2S(so), so->so_qlimit, &error);
418 		break;
419 
420 	case PRU_SEND:
421 	{
422 		struct sockaddr *saddr;
423 		struct msghdr mhdr;
424 		size_t iov_max, i;
425 		struct iovec iov_buf[32], *iov;
426 		struct mbuf *m2;
427 		size_t tot;
428 		int s;
429 
430 		bpf_mtap_af(&sockin_if, AF_UNSPEC, m);
431 
432 		memset(&mhdr, 0, sizeof(mhdr));
433 
434 		iov_max = 0;
435 		for (m2 = m; m2 != NULL; m2 = m2->m_next) {
436 			iov_max++;
437 		}
438 
439 		if (iov_max <= __arraycount(iov_buf)) {
440 			iov = iov_buf;
441 		} else {
442 			iov = kmem_alloc(sizeof(struct iovec) * iov_max,
443 			    KM_SLEEP);
444 		}
445 
446 		tot = 0;
447 		for (i = 0, m2 = m; m2 != NULL; m2 = m2->m_next, i++) {
448 			iov[i].iov_base = m2->m_data;
449 			iov[i].iov_len = m2->m_len;
450 			tot += m2->m_len;
451 		}
452 		mhdr.msg_iov = iov;
453 		mhdr.msg_iovlen = i;
454 		s = SO2S(so);
455 
456 		if (nam != NULL) {
457 			saddr = mtod(nam, struct sockaddr *);
458 			mhdr.msg_name = saddr;
459 			mhdr.msg_namelen = saddr->sa_len;
460 		}
461 
462 		rumpcomp_sockin_sendmsg(s, &mhdr, 0, &error);
463 
464 		if (iov != iov_buf)
465 			kmem_free(iov, sizeof(struct iovec) * iov_max);
466 
467 		m_freem(m);
468 		m_freem(control);
469 
470 		/* this assumes too many things to list.. buthey, testing */
471 		if (!rump_threads)
472 			sockin_process(so);
473 	}
474 		break;
475 
476 	case PRU_SHUTDOWN:
477 		removesock(so);
478 		break;
479 
480 	case PRU_SOCKADDR:
481 	case PRU_PEERADDR:
482 	{
483 		int slen = nam->m_len;
484 		enum rumpcomp_sockin_getnametype which;
485 
486 		if (req == PRU_SOCKADDR)
487 			which = RUMPCOMP_SOCKIN_SOCKNAME;
488 		else
489 			which = RUMPCOMP_SOCKIN_PEERNAME;
490 		rumpcomp_sockin_getname(SO2S(so),
491 		    mtod(nam, struct sockaddr *), &slen, which, &error);
492 		if (error == 0)
493 			nam->m_len = slen;
494 		break;
495 	}
496 
497 	case PRU_CONTROL:
498 		error = ENOTTY;
499 		break;
500 
501 	default:
502 		panic("sockin_usrreq: IMPLEMENT ME, req %d not supported", req);
503 	}
504 
505 	return error;
506 }
507 
508 static int
509 sockin_ctloutput(int op, struct socket *so, struct sockopt *sopt)
510 {
511 	int error;
512 
513 	rumpcomp_sockin_setsockopt(SO2S(so), sopt->sopt_level,
514 	    sopt->sopt_name, sopt->sopt_data, sopt->sopt_size, &error);
515 	return error;
516 }
517