1 /* $NetBSD: sockin.c,v 1.42 2014/07/01 05:49:19 rtr Exp $ */ 2 3 /* 4 * Copyright (c) 2008, 2009 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __KERNEL_RCSID(0, "$NetBSD: sockin.c,v 1.42 2014/07/01 05:49:19 rtr Exp $"); 30 31 #include <sys/param.h> 32 #include <sys/condvar.h> 33 #include <sys/domain.h> 34 #include <sys/kmem.h> 35 #include <sys/kthread.h> 36 #include <sys/mbuf.h> 37 #include <sys/mutex.h> 38 #include <sys/once.h> 39 #include <sys/poll.h> 40 #include <sys/protosw.h> 41 #include <sys/queue.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/time.h> 45 46 #include <net/bpf.h> 47 #include <net/if.h> 48 #include <net/radix.h> 49 50 #include <netinet/in.h> 51 #include <netinet/in_systm.h> 52 #include <netinet/ip.h> 53 54 #include <rump/rumpuser.h> 55 56 #include "rump_private.h" 57 #include "sockin_user.h" 58 59 /* 60 * An inet communication domain which uses the socket interface. 61 * Supports IPv4 & IPv6 UDP/TCP. 62 */ 63 64 DOMAIN_DEFINE(sockindomain); 65 DOMAIN_DEFINE(sockin6domain); 66 67 static int sockin_do_init(void); 68 static void sockin_init(void); 69 static int sockin_attach(struct socket *, int); 70 static void sockin_detach(struct socket *); 71 static int sockin_ioctl(struct socket *, u_long, void *, struct ifnet *); 72 static int sockin_usrreq(struct socket *, int, struct mbuf *, 73 struct mbuf *, struct mbuf *, struct lwp *); 74 static int sockin_ctloutput(int op, struct socket *, struct sockopt *); 75 76 static const struct pr_usrreqs sockin_usrreqs = { 77 .pr_attach = sockin_attach, 78 .pr_detach = sockin_detach, 79 .pr_ioctl = sockin_ioctl, 80 .pr_generic = sockin_usrreq, 81 }; 82 83 const struct protosw sockinsw[] = { 84 { 85 .pr_type = SOCK_DGRAM, 86 .pr_domain = &sockindomain, 87 .pr_protocol = IPPROTO_UDP, 88 .pr_flags = PR_ATOMIC|PR_ADDR, 89 .pr_usrreqs = &sockin_usrreqs, 90 .pr_ctloutput = sockin_ctloutput, 91 }, 92 { 93 .pr_type = SOCK_STREAM, 94 .pr_domain = &sockindomain, 95 .pr_protocol = IPPROTO_TCP, 96 .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS, 97 .pr_usrreqs = &sockin_usrreqs, 98 .pr_ctloutput = sockin_ctloutput, 99 }}; 100 const struct protosw sockin6sw[] = { 101 { 102 .pr_type = SOCK_DGRAM, 103 .pr_domain = &sockin6domain, 104 .pr_protocol = IPPROTO_UDP, 105 .pr_flags = PR_ATOMIC|PR_ADDR, 106 .pr_usrreqs = &sockin_usrreqs, 107 .pr_ctloutput = sockin_ctloutput, 108 }, 109 { 110 .pr_type = SOCK_STREAM, 111 .pr_domain = &sockin6domain, 112 .pr_protocol = IPPROTO_TCP, 113 .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS, 114 .pr_usrreqs = &sockin_usrreqs, 115 .pr_ctloutput = sockin_ctloutput, 116 }}; 117 118 struct domain sockindomain = { 119 .dom_family = PF_INET, 120 .dom_name = "socket_inet", 121 .dom_init = sockin_init, 122 .dom_externalize = NULL, 123 .dom_dispose = NULL, 124 .dom_protosw = sockinsw, 125 .dom_protoswNPROTOSW = &sockinsw[__arraycount(sockinsw)], 126 .dom_rtattach = rt_inithead, 127 .dom_rtoffset = 32, 128 .dom_maxrtkey = sizeof(struct sockaddr_in), 129 .dom_ifattach = NULL, 130 .dom_ifdetach = NULL, 131 .dom_ifqueues = { NULL }, 132 .dom_link = { NULL }, 133 .dom_mowner = MOWNER_INIT("",""), 134 .dom_rtcache = { NULL }, 135 .dom_sockaddr_cmp = NULL 136 }; 137 struct domain sockin6domain = { 138 .dom_family = PF_INET6, 139 .dom_name = "socket_inet6", 140 .dom_init = sockin_init, 141 .dom_externalize = NULL, 142 .dom_dispose = NULL, 143 .dom_protosw = sockin6sw, 144 .dom_protoswNPROTOSW = &sockin6sw[__arraycount(sockin6sw)], 145 .dom_rtattach = rt_inithead, 146 .dom_rtoffset = 32, 147 .dom_maxrtkey = sizeof(struct sockaddr_in6), 148 .dom_ifattach = NULL, 149 .dom_ifdetach = NULL, 150 .dom_ifqueues = { NULL }, 151 .dom_link = { NULL }, 152 .dom_mowner = MOWNER_INIT("",""), 153 .dom_rtcache = { NULL }, 154 .dom_sockaddr_cmp = NULL 155 }; 156 157 #define SO2S(so) ((intptr_t)(so->so_internal)) 158 #define SOCKIN_SBSIZE 65536 159 160 struct sockin_unit { 161 struct socket *su_so; 162 163 LIST_ENTRY(sockin_unit) su_entries; 164 }; 165 static LIST_HEAD(, sockin_unit) su_ent = LIST_HEAD_INITIALIZER(su_ent); 166 static kmutex_t su_mtx; 167 static bool rebuild; 168 static int nsock; 169 170 /* XXX: for the bpf hack */ 171 static struct ifnet sockin_if; 172 int ifpromisc(struct ifnet *ifp, int pswitch) { return 0; } 173 174 static int 175 registersock(struct socket *so, int news) 176 { 177 struct sockin_unit *su; 178 179 su = kmem_alloc(sizeof(*su), KM_NOSLEEP); 180 if (!su) 181 return ENOMEM; 182 183 so->so_internal = (void *)(intptr_t)news; 184 su->su_so = so; 185 186 mutex_enter(&su_mtx); 187 LIST_INSERT_HEAD(&su_ent, su, su_entries); 188 nsock++; 189 rebuild = true; 190 mutex_exit(&su_mtx); 191 192 return 0; 193 } 194 195 static void 196 removesock(struct socket *so) 197 { 198 struct sockin_unit *su_iter; 199 200 mutex_enter(&su_mtx); 201 LIST_FOREACH(su_iter, &su_ent, su_entries) { 202 if (su_iter->su_so == so) 203 break; 204 } 205 if (!su_iter) 206 panic("no such socket"); 207 208 LIST_REMOVE(su_iter, su_entries); 209 nsock--; 210 rebuild = true; 211 mutex_exit(&su_mtx); 212 213 rumpuser_close(SO2S(su_iter->su_so)); 214 kmem_free(su_iter, sizeof(*su_iter)); 215 } 216 217 static void 218 sockin_process(struct socket *so) 219 { 220 struct sockaddr_in6 from; 221 struct iovec io; 222 struct msghdr rmsg; 223 struct mbuf *m; 224 size_t n, plen; 225 int error; 226 227 m = m_gethdr(M_WAIT, MT_DATA); 228 if (so->so_proto->pr_type == SOCK_DGRAM) { 229 plen = IP_MAXPACKET; 230 MEXTMALLOC(m, plen, M_DONTWAIT); 231 } else { 232 plen = MCLBYTES; 233 MCLGET(m, M_DONTWAIT); 234 } 235 if ((m->m_flags & M_EXT) == 0) { 236 m_freem(m); 237 return; 238 } 239 240 memset(&rmsg, 0, sizeof(rmsg)); 241 io.iov_base = mtod(m, void *); 242 io.iov_len = plen; 243 rmsg.msg_iov = &io; 244 rmsg.msg_iovlen = 1; 245 rmsg.msg_name = (struct sockaddr *)&from; 246 rmsg.msg_namelen = sizeof(from); 247 248 error = rumpcomp_sockin_recvmsg(SO2S(so), &rmsg, 0, &n); 249 if (error || n == 0) { 250 m_freem(m); 251 252 /* Treat a TCP socket a goner */ 253 if (error != EAGAIN && so->so_proto->pr_type == SOCK_STREAM) { 254 mutex_enter(softnet_lock); 255 soisdisconnected(so); 256 mutex_exit(softnet_lock); 257 removesock(so); 258 } 259 return; 260 } 261 m->m_len = m->m_pkthdr.len = n; 262 263 bpf_mtap_af(&sockin_if, AF_UNSPEC, m); 264 265 mutex_enter(softnet_lock); 266 if (so->so_proto->pr_type == SOCK_DGRAM) { 267 if (!sbappendaddr(&so->so_rcv, rmsg.msg_name, m, NULL)) { 268 m_freem(m); 269 } 270 } else { 271 sbappendstream(&so->so_rcv, m); 272 } 273 274 sorwakeup(so); 275 mutex_exit(softnet_lock); 276 } 277 278 static void 279 sockin_accept(struct socket *so) 280 { 281 struct socket *nso; 282 struct sockaddr_in6 sin; 283 int news, error, slen; 284 285 slen = sizeof(sin); 286 error = rumpcomp_sockin_accept(SO2S(so), (struct sockaddr *)&sin, 287 &slen, &news); 288 if (error) 289 return; 290 291 mutex_enter(softnet_lock); 292 nso = sonewconn(so, true); 293 if (nso == NULL) 294 goto errout; 295 if (registersock(nso, news) != 0) 296 goto errout; 297 mutex_exit(softnet_lock); 298 return; 299 300 errout: 301 rumpuser_close(news); 302 if (nso) 303 soclose(nso); 304 mutex_exit(softnet_lock); 305 } 306 307 #define POLLTIMEOUT 100 /* check for new entries every 100ms */ 308 309 /* XXX: doesn't handle socket (kernel) locking properly? */ 310 static void 311 sockinworker(void *arg) 312 { 313 struct pollfd *pfds = NULL, *npfds; 314 struct sockin_unit *su_iter; 315 struct socket *so; 316 int cursock = 0, i, rv, error; 317 318 /* 319 * Loop reading requests. Check for new sockets periodically 320 * (could be smarter, but I'm lazy). 321 */ 322 for (;;) { 323 if (rebuild) { 324 npfds = NULL; 325 mutex_enter(&su_mtx); 326 if (nsock) 327 npfds = kmem_alloc(nsock * sizeof(*npfds), 328 KM_NOSLEEP); 329 if (npfds || nsock == 0) { 330 if (pfds) 331 kmem_free(pfds, cursock*sizeof(*pfds)); 332 pfds = npfds; 333 cursock = nsock; 334 rebuild = false; 335 336 i = 0; 337 LIST_FOREACH(su_iter, &su_ent, su_entries) { 338 pfds[i].fd = SO2S(su_iter->su_so); 339 pfds[i].events = POLLIN; 340 pfds[i].revents = 0; 341 i++; 342 } 343 KASSERT(i == nsock); 344 } 345 mutex_exit(&su_mtx); 346 } 347 348 /* find affected sockets & process */ 349 error = rumpcomp_sockin_poll(pfds, cursock, POLLTIMEOUT, &rv); 350 for (i = 0; i < cursock && rv > 0 && error == 0; i++) { 351 if (pfds[i].revents & POLLIN) { 352 mutex_enter(&su_mtx); 353 LIST_FOREACH(su_iter, &su_ent, su_entries) { 354 if (SO2S(su_iter->su_so)==pfds[i].fd) { 355 so = su_iter->su_so; 356 mutex_exit(&su_mtx); 357 if(so->so_options&SO_ACCEPTCONN) 358 sockin_accept(so); 359 else 360 sockin_process(so); 361 mutex_enter(&su_mtx); 362 break; 363 } 364 } 365 /* if we can't find it, just wing it */ 366 KASSERT(rebuild || su_iter); 367 mutex_exit(&su_mtx); 368 pfds[i].revents = 0; 369 rv--; 370 i = -1; 371 continue; 372 } 373 374 /* something else? ignore */ 375 if (pfds[i].revents) { 376 pfds[i].revents = 0; 377 rv--; 378 } 379 } 380 KASSERT(rv <= 0); 381 } 382 383 } 384 385 static int 386 sockin_do_init(void) 387 { 388 int rv; 389 390 if (rump_threads) { 391 if ((rv = kthread_create(PRI_NONE, 0, NULL, sockinworker, 392 NULL, NULL, "sockwork")) != 0) 393 panic("sockin_init: could not create worker thread\n"); 394 } else { 395 printf("sockin_init: no threads => no worker thread\n"); 396 } 397 mutex_init(&su_mtx, MUTEX_DEFAULT, IPL_NONE); 398 strlcpy(sockin_if.if_xname, "sockin0", sizeof(sockin_if.if_xname)); 399 bpf_attach(&sockin_if, DLT_NULL, 0); 400 return 0; 401 } 402 403 static void 404 sockin_init(void) 405 { 406 static ONCE_DECL(init); 407 408 RUN_ONCE(&init, sockin_do_init); 409 } 410 411 static int 412 sockin_attach(struct socket *so, int proto) 413 { 414 const int type = so->so_proto->pr_type; 415 int error, news, family; 416 417 sosetlock(so); 418 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 419 error = soreserve(so, SOCKIN_SBSIZE, SOCKIN_SBSIZE); 420 if (error) 421 return error; 422 } 423 424 family = so->so_proto->pr_domain->dom_family; 425 KASSERT(family == PF_INET || family == PF_INET6); 426 error = rumpcomp_sockin_socket(family, type, 0, &news); 427 if (error) 428 return error; 429 430 /* For UDP sockets, make sure we can send/recv maximum. */ 431 if (type == SOCK_DGRAM) { 432 int sbsize = SOCKIN_SBSIZE; 433 error = rumpcomp_sockin_setsockopt(news, 434 SOL_SOCKET, SO_SNDBUF, 435 &sbsize, sizeof(sbsize)); 436 sbsize = SOCKIN_SBSIZE; 437 error = rumpcomp_sockin_setsockopt(news, 438 SOL_SOCKET, SO_RCVBUF, 439 &sbsize, sizeof(sbsize)); 440 } 441 442 if ((error = registersock(so, news)) != 0) 443 rumpuser_close(news); 444 445 return error; 446 } 447 448 static void 449 sockin_detach(struct socket *so) 450 { 451 panic("sockin_detach: IMPLEMENT ME\n"); 452 } 453 454 static int 455 sockin_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp) 456 { 457 return ENOTTY; 458 } 459 460 static int 461 sockin_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 462 struct mbuf *control, struct lwp *l) 463 { 464 int error = 0; 465 466 KASSERT(req != PRU_CONTROL); 467 468 switch (req) { 469 case PRU_ACCEPT: 470 /* we do all the work in the worker thread */ 471 break; 472 473 case PRU_BIND: 474 error = rumpcomp_sockin_bind(SO2S(so), 475 mtod(nam, const struct sockaddr *), 476 nam->m_len); 477 break; 478 479 case PRU_CONNECT: 480 error = rumpcomp_sockin_connect(SO2S(so), 481 mtod(nam, struct sockaddr *), nam->m_len); 482 if (error == 0) 483 soisconnected(so); 484 break; 485 486 case PRU_LISTEN: 487 error = rumpcomp_sockin_listen(SO2S(so), so->so_qlimit); 488 break; 489 490 case PRU_SEND: 491 { 492 struct sockaddr *saddr; 493 struct msghdr mhdr; 494 size_t iov_max, i; 495 struct iovec iov_buf[32], *iov; 496 struct mbuf *m2; 497 size_t tot, n; 498 int s; 499 500 bpf_mtap_af(&sockin_if, AF_UNSPEC, m); 501 502 memset(&mhdr, 0, sizeof(mhdr)); 503 504 iov_max = 0; 505 for (m2 = m; m2 != NULL; m2 = m2->m_next) { 506 iov_max++; 507 } 508 509 if (iov_max <= __arraycount(iov_buf)) { 510 iov = iov_buf; 511 } else { 512 iov = kmem_alloc(sizeof(struct iovec) * iov_max, 513 KM_SLEEP); 514 } 515 516 tot = 0; 517 for (i = 0, m2 = m; m2 != NULL; m2 = m2->m_next, i++) { 518 iov[i].iov_base = m2->m_data; 519 iov[i].iov_len = m2->m_len; 520 tot += m2->m_len; 521 } 522 mhdr.msg_iov = iov; 523 mhdr.msg_iovlen = i; 524 s = SO2S(so); 525 526 if (nam != NULL) { 527 saddr = mtod(nam, struct sockaddr *); 528 mhdr.msg_name = saddr; 529 mhdr.msg_namelen = saddr->sa_len; 530 } 531 532 rumpcomp_sockin_sendmsg(s, &mhdr, 0, &n); 533 534 if (iov != iov_buf) 535 kmem_free(iov, sizeof(struct iovec) * iov_max); 536 537 m_freem(m); 538 m_freem(control); 539 540 /* this assumes too many things to list.. buthey, testing */ 541 if (!rump_threads) 542 sockin_process(so); 543 } 544 break; 545 546 case PRU_SHUTDOWN: 547 removesock(so); 548 break; 549 550 case PRU_SOCKADDR: 551 case PRU_PEERADDR: 552 { 553 int slen = nam->m_len; 554 enum rumpcomp_sockin_getnametype which; 555 556 if (req == PRU_SOCKADDR) 557 which = RUMPCOMP_SOCKIN_SOCKNAME; 558 else 559 which = RUMPCOMP_SOCKIN_PEERNAME; 560 error = rumpcomp_sockin_getname(SO2S(so), 561 mtod(nam, struct sockaddr *), &slen, which); 562 if (error == 0) 563 nam->m_len = slen; 564 break; 565 } 566 567 default: 568 panic("sockin_usrreq: IMPLEMENT ME, req %d not supported", req); 569 } 570 571 return error; 572 } 573 574 static int 575 sockin_ctloutput(int op, struct socket *so, struct sockopt *sopt) 576 { 577 578 return rumpcomp_sockin_setsockopt(SO2S(so), sopt->sopt_level, 579 sopt->sopt_name, sopt->sopt_data, sopt->sopt_size); 580 } 581 582 int sockin_unavailable(void); 583 int 584 sockin_unavailable(void) 585 { 586 587 panic("interface not available in with sockin"); 588 } 589 __strong_alias(rtrequest,sockin_unavailable); 590 __strong_alias(ifunit,sockin_unavailable); 591 __strong_alias(ifreq_setaddr,sockin_unavailable); 592