1 /* $NetBSD: sockin.c,v 1.35 2013/08/29 17:49:21 rmind Exp $ */ 2 3 /* 4 * Copyright (c) 2008, 2009 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __KERNEL_RCSID(0, "$NetBSD: sockin.c,v 1.35 2013/08/29 17:49:21 rmind Exp $"); 30 31 #include <sys/param.h> 32 #include <sys/condvar.h> 33 #include <sys/domain.h> 34 #include <sys/kmem.h> 35 #include <sys/kthread.h> 36 #include <sys/mbuf.h> 37 #include <sys/mutex.h> 38 #include <sys/once.h> 39 #include <sys/poll.h> 40 #include <sys/protosw.h> 41 #include <sys/queue.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/time.h> 45 46 #include <net/bpf.h> 47 #include <net/if.h> 48 #include <net/radix.h> 49 50 #include <netinet/in.h> 51 #include <netinet/in_systm.h> 52 #include <netinet/ip.h> 53 54 #include <rump/rumpuser.h> 55 56 #include "rump_private.h" 57 #include "rumpcomp_user.h" 58 59 /* 60 * An inet communication domain which uses the socket interface. 61 * Currently supports only IPv4 UDP, but could easily be extended to 62 * support IPv6 and TCP by adding more stuff to the protosw. 63 */ 64 65 DOMAIN_DEFINE(sockindomain); 66 DOMAIN_DEFINE(sockin6domain); 67 68 static int sockin_do_init(void); 69 static void sockin_init(void); 70 static int sockin_usrreq(struct socket *, int, struct mbuf *, 71 struct mbuf *, struct mbuf *, struct lwp *); 72 static int sockin_ctloutput(int op, struct socket *, struct sockopt *); 73 74 const struct protosw sockinsw[] = { 75 { 76 .pr_type = SOCK_DGRAM, 77 .pr_domain = &sockindomain, 78 .pr_protocol = IPPROTO_UDP, 79 .pr_flags = PR_ATOMIC|PR_ADDR, 80 .pr_usrreq = sockin_usrreq, 81 .pr_ctloutput = sockin_ctloutput, 82 }, 83 { 84 .pr_type = SOCK_STREAM, 85 .pr_domain = &sockindomain, 86 .pr_protocol = IPPROTO_TCP, 87 .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS, 88 .pr_usrreq = sockin_usrreq, 89 .pr_ctloutput = sockin_ctloutput, 90 }}; 91 const struct protosw sockin6sw[] = { 92 { 93 .pr_type = SOCK_DGRAM, 94 .pr_domain = &sockin6domain, 95 .pr_protocol = IPPROTO_UDP, 96 .pr_flags = PR_ATOMIC|PR_ADDR, 97 .pr_usrreq = sockin_usrreq, 98 .pr_ctloutput = sockin_ctloutput, 99 }, 100 { 101 .pr_type = SOCK_STREAM, 102 .pr_domain = &sockin6domain, 103 .pr_protocol = IPPROTO_TCP, 104 .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS, 105 .pr_usrreq = sockin_usrreq, 106 .pr_ctloutput = sockin_ctloutput, 107 }}; 108 109 struct domain sockindomain = { 110 .dom_family = PF_INET, 111 .dom_name = "socket_inet", 112 .dom_init = sockin_init, 113 .dom_externalize = NULL, 114 .dom_dispose = NULL, 115 .dom_protosw = sockinsw, 116 .dom_protoswNPROTOSW = &sockinsw[__arraycount(sockinsw)], 117 .dom_rtattach = rt_inithead, 118 .dom_rtoffset = 32, 119 .dom_maxrtkey = sizeof(struct sockaddr_in), 120 .dom_ifattach = NULL, 121 .dom_ifdetach = NULL, 122 .dom_ifqueues = { NULL }, 123 .dom_link = { NULL }, 124 .dom_mowner = MOWNER_INIT("",""), 125 .dom_rtcache = { NULL }, 126 .dom_sockaddr_cmp = NULL 127 }; 128 struct domain sockin6domain = { 129 .dom_family = PF_INET6, 130 .dom_name = "socket_inet6", 131 .dom_init = sockin_init, 132 .dom_externalize = NULL, 133 .dom_dispose = NULL, 134 .dom_protosw = sockin6sw, 135 .dom_protoswNPROTOSW = &sockin6sw[__arraycount(sockin6sw)], 136 .dom_rtattach = rt_inithead, 137 .dom_rtoffset = 32, 138 .dom_maxrtkey = sizeof(struct sockaddr_in6), 139 .dom_ifattach = NULL, 140 .dom_ifdetach = NULL, 141 .dom_ifqueues = { NULL }, 142 .dom_link = { NULL }, 143 .dom_mowner = MOWNER_INIT("",""), 144 .dom_rtcache = { NULL }, 145 .dom_sockaddr_cmp = NULL 146 }; 147 148 #define SO2S(so) ((intptr_t)(so->so_internal)) 149 #define SOCKIN_SBSIZE 65536 150 151 struct sockin_unit { 152 struct socket *su_so; 153 154 LIST_ENTRY(sockin_unit) su_entries; 155 }; 156 static LIST_HEAD(, sockin_unit) su_ent = LIST_HEAD_INITIALIZER(su_ent); 157 static kmutex_t su_mtx; 158 static bool rebuild; 159 static int nsock; 160 161 /* XXX: for the bpf hack */ 162 static struct ifnet sockin_if; 163 int ifpromisc(struct ifnet *ifp, int pswitch) { return 0; } 164 165 static int 166 registersock(struct socket *so, int news) 167 { 168 struct sockin_unit *su; 169 170 su = kmem_alloc(sizeof(*su), KM_NOSLEEP); 171 if (!su) 172 return ENOMEM; 173 174 so->so_internal = (void *)(intptr_t)news; 175 su->su_so = so; 176 177 mutex_enter(&su_mtx); 178 LIST_INSERT_HEAD(&su_ent, su, su_entries); 179 nsock++; 180 rebuild = true; 181 mutex_exit(&su_mtx); 182 183 return 0; 184 } 185 186 static void 187 removesock(struct socket *so) 188 { 189 struct sockin_unit *su_iter; 190 191 mutex_enter(&su_mtx); 192 LIST_FOREACH(su_iter, &su_ent, su_entries) { 193 if (su_iter->su_so == so) 194 break; 195 } 196 if (!su_iter) 197 panic("no such socket"); 198 199 LIST_REMOVE(su_iter, su_entries); 200 nsock--; 201 rebuild = true; 202 mutex_exit(&su_mtx); 203 204 rumpuser_close(SO2S(su_iter->su_so)); 205 kmem_free(su_iter, sizeof(*su_iter)); 206 } 207 208 static void 209 sockin_process(struct socket *so) 210 { 211 struct sockaddr_in6 from; 212 struct iovec io; 213 struct msghdr rmsg; 214 struct mbuf *m; 215 size_t n, plen; 216 int error; 217 218 m = m_gethdr(M_WAIT, MT_DATA); 219 if (so->so_proto->pr_type == SOCK_DGRAM) { 220 plen = IP_MAXPACKET; 221 MEXTMALLOC(m, plen, M_DONTWAIT); 222 } else { 223 plen = MCLBYTES; 224 MCLGET(m, M_DONTWAIT); 225 } 226 if ((m->m_flags & M_EXT) == 0) { 227 m_freem(m); 228 return; 229 } 230 231 memset(&rmsg, 0, sizeof(rmsg)); 232 io.iov_base = mtod(m, void *); 233 io.iov_len = plen; 234 rmsg.msg_iov = &io; 235 rmsg.msg_iovlen = 1; 236 rmsg.msg_name = (struct sockaddr *)&from; 237 rmsg.msg_namelen = sizeof(from); 238 239 error = rumpcomp_sockin_recvmsg(SO2S(so), &rmsg, 0, &n); 240 if (error || n == 0) { 241 m_freem(m); 242 243 /* Treat a TCP socket a goner */ 244 if (error != EAGAIN && so->so_proto->pr_type == SOCK_STREAM) { 245 mutex_enter(softnet_lock); 246 soisdisconnected(so); 247 mutex_exit(softnet_lock); 248 removesock(so); 249 } 250 return; 251 } 252 m->m_len = m->m_pkthdr.len = n; 253 254 bpf_mtap_af(&sockin_if, AF_UNSPEC, m); 255 256 mutex_enter(softnet_lock); 257 if (so->so_proto->pr_type == SOCK_DGRAM) { 258 if (!sbappendaddr(&so->so_rcv, rmsg.msg_name, m, NULL)) { 259 m_freem(m); 260 } 261 } else { 262 sbappendstream(&so->so_rcv, m); 263 } 264 265 sorwakeup(so); 266 mutex_exit(softnet_lock); 267 } 268 269 static void 270 sockin_accept(struct socket *so) 271 { 272 struct socket *nso; 273 struct sockaddr_in6 sin; 274 int news, error, slen; 275 276 slen = sizeof(sin); 277 error = rumpcomp_sockin_accept(SO2S(so), (struct sockaddr *)&sin, 278 &slen, &news); 279 if (error) 280 return; 281 282 mutex_enter(softnet_lock); 283 nso = sonewconn(so, true); 284 if (nso == NULL) 285 goto errout; 286 if (registersock(nso, news) != 0) 287 goto errout; 288 mutex_exit(softnet_lock); 289 return; 290 291 errout: 292 rumpuser_close(news); 293 if (nso) 294 soclose(nso); 295 mutex_exit(softnet_lock); 296 } 297 298 #define POLLTIMEOUT 100 /* check for new entries every 100ms */ 299 300 /* XXX: doesn't handle socket (kernel) locking properly? */ 301 static void 302 sockinworker(void *arg) 303 { 304 struct pollfd *pfds = NULL, *npfds; 305 struct sockin_unit *su_iter; 306 struct socket *so; 307 int cursock = 0, i, rv, error; 308 309 /* 310 * Loop reading requests. Check for new sockets periodically 311 * (could be smarter, but I'm lazy). 312 */ 313 for (;;) { 314 if (rebuild) { 315 npfds = NULL; 316 mutex_enter(&su_mtx); 317 if (nsock) 318 npfds = kmem_alloc(nsock * sizeof(*npfds), 319 KM_NOSLEEP); 320 if (npfds || nsock == 0) { 321 if (pfds) 322 kmem_free(pfds, cursock*sizeof(*pfds)); 323 pfds = npfds; 324 cursock = nsock; 325 rebuild = false; 326 327 i = 0; 328 LIST_FOREACH(su_iter, &su_ent, su_entries) { 329 pfds[i].fd = SO2S(su_iter->su_so); 330 pfds[i].events = POLLIN; 331 pfds[i].revents = 0; 332 i++; 333 } 334 KASSERT(i == nsock); 335 } 336 mutex_exit(&su_mtx); 337 } 338 339 /* find affected sockets & process */ 340 error = rumpcomp_sockin_poll(pfds, cursock, POLLTIMEOUT, &rv); 341 for (i = 0; i < cursock && rv > 0 && error == 0; i++) { 342 if (pfds[i].revents & POLLIN) { 343 mutex_enter(&su_mtx); 344 LIST_FOREACH(su_iter, &su_ent, su_entries) { 345 if (SO2S(su_iter->su_so)==pfds[i].fd) { 346 so = su_iter->su_so; 347 mutex_exit(&su_mtx); 348 if(so->so_options&SO_ACCEPTCONN) 349 sockin_accept(so); 350 else 351 sockin_process(so); 352 mutex_enter(&su_mtx); 353 break; 354 } 355 } 356 /* if we can't find it, just wing it */ 357 KASSERT(rebuild || su_iter); 358 mutex_exit(&su_mtx); 359 pfds[i].revents = 0; 360 rv--; 361 i = -1; 362 continue; 363 } 364 365 /* something else? ignore */ 366 if (pfds[i].revents) { 367 pfds[i].revents = 0; 368 rv--; 369 } 370 } 371 KASSERT(rv <= 0); 372 } 373 374 } 375 376 static int 377 sockin_do_init(void) 378 { 379 int rv; 380 381 if (rump_threads) { 382 if ((rv = kthread_create(PRI_NONE, 0, NULL, sockinworker, 383 NULL, NULL, "sockwork")) != 0) 384 panic("sockin_init: could not create worker thread\n"); 385 } else { 386 printf("sockin_init: no threads => no worker thread\n"); 387 } 388 mutex_init(&su_mtx, MUTEX_DEFAULT, IPL_NONE); 389 strlcpy(sockin_if.if_xname, "sockin0", sizeof(sockin_if.if_xname)); 390 bpf_attach(&sockin_if, DLT_NULL, 0); 391 return 0; 392 } 393 394 static void 395 sockin_init(void) 396 { 397 static ONCE_DECL(init); 398 399 RUN_ONCE(&init, sockin_do_init); 400 } 401 402 static int 403 sockin_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 404 struct mbuf *control, struct lwp *l) 405 { 406 int error = 0; 407 408 switch (req) { 409 case PRU_ATTACH: 410 { 411 int news; 412 int sbsize; 413 int family; 414 415 sosetlock(so); 416 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 417 error = soreserve(so, SOCKIN_SBSIZE, SOCKIN_SBSIZE); 418 if (error) 419 break; 420 } 421 422 family = so->so_proto->pr_domain->dom_family; 423 KASSERT(family == PF_INET || family == PF_INET6); 424 error = rumpcomp_sockin_socket(family, 425 so->so_proto->pr_type, 0, &news); 426 if (error) 427 break; 428 429 /* for UDP sockets, make sure we can send&recv max */ 430 if (so->so_proto->pr_type == SOCK_DGRAM) { 431 sbsize = SOCKIN_SBSIZE; 432 error = rumpcomp_sockin_setsockopt(news, 433 SOL_SOCKET, SO_SNDBUF, 434 &sbsize, sizeof(sbsize)); 435 sbsize = SOCKIN_SBSIZE; 436 error = rumpcomp_sockin_setsockopt(news, 437 SOL_SOCKET, SO_RCVBUF, 438 &sbsize, sizeof(sbsize)); 439 } 440 441 if ((error = registersock(so, news)) != 0) 442 rumpuser_close(news); 443 444 break; 445 } 446 447 case PRU_ACCEPT: 448 /* we do all the work in the worker thread */ 449 break; 450 451 case PRU_BIND: 452 error = rumpcomp_sockin_bind(SO2S(so), 453 mtod(nam, const struct sockaddr *), 454 nam->m_len); 455 break; 456 457 case PRU_CONNECT: 458 error = rumpcomp_sockin_connect(SO2S(so), 459 mtod(nam, struct sockaddr *), nam->m_len); 460 if (error == 0) 461 soisconnected(so); 462 break; 463 464 case PRU_LISTEN: 465 error = rumpcomp_sockin_listen(SO2S(so), so->so_qlimit); 466 break; 467 468 case PRU_SEND: 469 { 470 struct sockaddr *saddr; 471 struct msghdr mhdr; 472 size_t iov_max, i; 473 struct iovec iov_buf[32], *iov; 474 struct mbuf *m2; 475 size_t tot, n; 476 int s; 477 478 bpf_mtap_af(&sockin_if, AF_UNSPEC, m); 479 480 memset(&mhdr, 0, sizeof(mhdr)); 481 482 iov_max = 0; 483 for (m2 = m; m2 != NULL; m2 = m2->m_next) { 484 iov_max++; 485 } 486 487 if (iov_max <= __arraycount(iov_buf)) { 488 iov = iov_buf; 489 } else { 490 iov = kmem_alloc(sizeof(struct iovec) * iov_max, 491 KM_SLEEP); 492 } 493 494 tot = 0; 495 for (i = 0, m2 = m; m2 != NULL; m2 = m2->m_next, i++) { 496 iov[i].iov_base = m2->m_data; 497 iov[i].iov_len = m2->m_len; 498 tot += m2->m_len; 499 } 500 mhdr.msg_iov = iov; 501 mhdr.msg_iovlen = i; 502 s = SO2S(so); 503 504 if (nam != NULL) { 505 saddr = mtod(nam, struct sockaddr *); 506 mhdr.msg_name = saddr; 507 mhdr.msg_namelen = saddr->sa_len; 508 } 509 510 rumpcomp_sockin_sendmsg(s, &mhdr, 0, &n); 511 512 if (iov != iov_buf) 513 kmem_free(iov, sizeof(struct iovec) * iov_max); 514 515 m_freem(m); 516 m_freem(control); 517 518 /* this assumes too many things to list.. buthey, testing */ 519 if (!rump_threads) 520 sockin_process(so); 521 } 522 break; 523 524 case PRU_SHUTDOWN: 525 removesock(so); 526 break; 527 528 case PRU_SOCKADDR: 529 case PRU_PEERADDR: 530 { 531 int slen = nam->m_len; 532 enum rumpcomp_sockin_getnametype which; 533 534 if (req == PRU_SOCKADDR) 535 which = RUMPCOMP_SOCKIN_SOCKNAME; 536 else 537 which = RUMPCOMP_SOCKIN_PEERNAME; 538 error = rumpcomp_sockin_getname(SO2S(so), 539 mtod(nam, struct sockaddr *), &slen, which); 540 if (error == 0) 541 nam->m_len = slen; 542 break; 543 } 544 545 case PRU_CONTROL: 546 error = ENOTTY; 547 break; 548 549 default: 550 panic("sockin_usrreq: IMPLEMENT ME, req %d not supported", req); 551 } 552 553 return error; 554 } 555 556 static int 557 sockin_ctloutput(int op, struct socket *so, struct sockopt *sopt) 558 { 559 560 return rumpcomp_sockin_setsockopt(SO2S(so), sopt->sopt_level, 561 sopt->sopt_name, sopt->sopt_data, sopt->sopt_size); 562 } 563