1 /* $NetBSD: sockin.c,v 1.40 2014/05/19 02:51:25 rmind Exp $ */ 2 3 /* 4 * Copyright (c) 2008, 2009 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __KERNEL_RCSID(0, "$NetBSD: sockin.c,v 1.40 2014/05/19 02:51:25 rmind Exp $"); 30 31 #include <sys/param.h> 32 #include <sys/condvar.h> 33 #include <sys/domain.h> 34 #include <sys/kmem.h> 35 #include <sys/kthread.h> 36 #include <sys/mbuf.h> 37 #include <sys/mutex.h> 38 #include <sys/once.h> 39 #include <sys/poll.h> 40 #include <sys/protosw.h> 41 #include <sys/queue.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/time.h> 45 46 #include <net/bpf.h> 47 #include <net/if.h> 48 #include <net/radix.h> 49 50 #include <netinet/in.h> 51 #include <netinet/in_systm.h> 52 #include <netinet/ip.h> 53 54 #include <rump/rumpuser.h> 55 56 #include "rump_private.h" 57 #include "sockin_user.h" 58 59 /* 60 * An inet communication domain which uses the socket interface. 61 * Supports IPv4 & IPv6 UDP/TCP. 62 */ 63 64 DOMAIN_DEFINE(sockindomain); 65 DOMAIN_DEFINE(sockin6domain); 66 67 static int sockin_do_init(void); 68 static void sockin_init(void); 69 static int sockin_attach(struct socket *, int); 70 static void sockin_detach(struct socket *); 71 static int sockin_usrreq(struct socket *, int, struct mbuf *, 72 struct mbuf *, struct mbuf *, struct lwp *); 73 static int sockin_ctloutput(int op, struct socket *, struct sockopt *); 74 75 static const struct pr_usrreqs sockin_usrreqs = { 76 .pr_attach = sockin_attach, 77 .pr_detach = sockin_detach, 78 .pr_generic = sockin_usrreq, 79 }; 80 81 const struct protosw sockinsw[] = { 82 { 83 .pr_type = SOCK_DGRAM, 84 .pr_domain = &sockindomain, 85 .pr_protocol = IPPROTO_UDP, 86 .pr_flags = PR_ATOMIC|PR_ADDR, 87 .pr_usrreqs = &sockin_usrreqs, 88 .pr_ctloutput = sockin_ctloutput, 89 }, 90 { 91 .pr_type = SOCK_STREAM, 92 .pr_domain = &sockindomain, 93 .pr_protocol = IPPROTO_TCP, 94 .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS, 95 .pr_usrreqs = &sockin_usrreqs, 96 .pr_ctloutput = sockin_ctloutput, 97 }}; 98 const struct protosw sockin6sw[] = { 99 { 100 .pr_type = SOCK_DGRAM, 101 .pr_domain = &sockin6domain, 102 .pr_protocol = IPPROTO_UDP, 103 .pr_flags = PR_ATOMIC|PR_ADDR, 104 .pr_usrreqs = &sockin_usrreqs, 105 .pr_ctloutput = sockin_ctloutput, 106 }, 107 { 108 .pr_type = SOCK_STREAM, 109 .pr_domain = &sockin6domain, 110 .pr_protocol = IPPROTO_TCP, 111 .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS, 112 .pr_usrreqs = &sockin_usrreqs, 113 .pr_ctloutput = sockin_ctloutput, 114 }}; 115 116 struct domain sockindomain = { 117 .dom_family = PF_INET, 118 .dom_name = "socket_inet", 119 .dom_init = sockin_init, 120 .dom_externalize = NULL, 121 .dom_dispose = NULL, 122 .dom_protosw = sockinsw, 123 .dom_protoswNPROTOSW = &sockinsw[__arraycount(sockinsw)], 124 .dom_rtattach = rt_inithead, 125 .dom_rtoffset = 32, 126 .dom_maxrtkey = sizeof(struct sockaddr_in), 127 .dom_ifattach = NULL, 128 .dom_ifdetach = NULL, 129 .dom_ifqueues = { NULL }, 130 .dom_link = { NULL }, 131 .dom_mowner = MOWNER_INIT("",""), 132 .dom_rtcache = { NULL }, 133 .dom_sockaddr_cmp = NULL 134 }; 135 struct domain sockin6domain = { 136 .dom_family = PF_INET6, 137 .dom_name = "socket_inet6", 138 .dom_init = sockin_init, 139 .dom_externalize = NULL, 140 .dom_dispose = NULL, 141 .dom_protosw = sockin6sw, 142 .dom_protoswNPROTOSW = &sockin6sw[__arraycount(sockin6sw)], 143 .dom_rtattach = rt_inithead, 144 .dom_rtoffset = 32, 145 .dom_maxrtkey = sizeof(struct sockaddr_in6), 146 .dom_ifattach = NULL, 147 .dom_ifdetach = NULL, 148 .dom_ifqueues = { NULL }, 149 .dom_link = { NULL }, 150 .dom_mowner = MOWNER_INIT("",""), 151 .dom_rtcache = { NULL }, 152 .dom_sockaddr_cmp = NULL 153 }; 154 155 #define SO2S(so) ((intptr_t)(so->so_internal)) 156 #define SOCKIN_SBSIZE 65536 157 158 struct sockin_unit { 159 struct socket *su_so; 160 161 LIST_ENTRY(sockin_unit) su_entries; 162 }; 163 static LIST_HEAD(, sockin_unit) su_ent = LIST_HEAD_INITIALIZER(su_ent); 164 static kmutex_t su_mtx; 165 static bool rebuild; 166 static int nsock; 167 168 /* XXX: for the bpf hack */ 169 static struct ifnet sockin_if; 170 int ifpromisc(struct ifnet *ifp, int pswitch) { return 0; } 171 172 static int 173 registersock(struct socket *so, int news) 174 { 175 struct sockin_unit *su; 176 177 su = kmem_alloc(sizeof(*su), KM_NOSLEEP); 178 if (!su) 179 return ENOMEM; 180 181 so->so_internal = (void *)(intptr_t)news; 182 su->su_so = so; 183 184 mutex_enter(&su_mtx); 185 LIST_INSERT_HEAD(&su_ent, su, su_entries); 186 nsock++; 187 rebuild = true; 188 mutex_exit(&su_mtx); 189 190 return 0; 191 } 192 193 static void 194 removesock(struct socket *so) 195 { 196 struct sockin_unit *su_iter; 197 198 mutex_enter(&su_mtx); 199 LIST_FOREACH(su_iter, &su_ent, su_entries) { 200 if (su_iter->su_so == so) 201 break; 202 } 203 if (!su_iter) 204 panic("no such socket"); 205 206 LIST_REMOVE(su_iter, su_entries); 207 nsock--; 208 rebuild = true; 209 mutex_exit(&su_mtx); 210 211 rumpuser_close(SO2S(su_iter->su_so)); 212 kmem_free(su_iter, sizeof(*su_iter)); 213 } 214 215 static void 216 sockin_process(struct socket *so) 217 { 218 struct sockaddr_in6 from; 219 struct iovec io; 220 struct msghdr rmsg; 221 struct mbuf *m; 222 size_t n, plen; 223 int error; 224 225 m = m_gethdr(M_WAIT, MT_DATA); 226 if (so->so_proto->pr_type == SOCK_DGRAM) { 227 plen = IP_MAXPACKET; 228 MEXTMALLOC(m, plen, M_DONTWAIT); 229 } else { 230 plen = MCLBYTES; 231 MCLGET(m, M_DONTWAIT); 232 } 233 if ((m->m_flags & M_EXT) == 0) { 234 m_freem(m); 235 return; 236 } 237 238 memset(&rmsg, 0, sizeof(rmsg)); 239 io.iov_base = mtod(m, void *); 240 io.iov_len = plen; 241 rmsg.msg_iov = &io; 242 rmsg.msg_iovlen = 1; 243 rmsg.msg_name = (struct sockaddr *)&from; 244 rmsg.msg_namelen = sizeof(from); 245 246 error = rumpcomp_sockin_recvmsg(SO2S(so), &rmsg, 0, &n); 247 if (error || n == 0) { 248 m_freem(m); 249 250 /* Treat a TCP socket a goner */ 251 if (error != EAGAIN && so->so_proto->pr_type == SOCK_STREAM) { 252 mutex_enter(softnet_lock); 253 soisdisconnected(so); 254 mutex_exit(softnet_lock); 255 removesock(so); 256 } 257 return; 258 } 259 m->m_len = m->m_pkthdr.len = n; 260 261 bpf_mtap_af(&sockin_if, AF_UNSPEC, m); 262 263 mutex_enter(softnet_lock); 264 if (so->so_proto->pr_type == SOCK_DGRAM) { 265 if (!sbappendaddr(&so->so_rcv, rmsg.msg_name, m, NULL)) { 266 m_freem(m); 267 } 268 } else { 269 sbappendstream(&so->so_rcv, m); 270 } 271 272 sorwakeup(so); 273 mutex_exit(softnet_lock); 274 } 275 276 static void 277 sockin_accept(struct socket *so) 278 { 279 struct socket *nso; 280 struct sockaddr_in6 sin; 281 int news, error, slen; 282 283 slen = sizeof(sin); 284 error = rumpcomp_sockin_accept(SO2S(so), (struct sockaddr *)&sin, 285 &slen, &news); 286 if (error) 287 return; 288 289 mutex_enter(softnet_lock); 290 nso = sonewconn(so, true); 291 if (nso == NULL) 292 goto errout; 293 if (registersock(nso, news) != 0) 294 goto errout; 295 mutex_exit(softnet_lock); 296 return; 297 298 errout: 299 rumpuser_close(news); 300 if (nso) 301 soclose(nso); 302 mutex_exit(softnet_lock); 303 } 304 305 #define POLLTIMEOUT 100 /* check for new entries every 100ms */ 306 307 /* XXX: doesn't handle socket (kernel) locking properly? */ 308 static void 309 sockinworker(void *arg) 310 { 311 struct pollfd *pfds = NULL, *npfds; 312 struct sockin_unit *su_iter; 313 struct socket *so; 314 int cursock = 0, i, rv, error; 315 316 /* 317 * Loop reading requests. Check for new sockets periodically 318 * (could be smarter, but I'm lazy). 319 */ 320 for (;;) { 321 if (rebuild) { 322 npfds = NULL; 323 mutex_enter(&su_mtx); 324 if (nsock) 325 npfds = kmem_alloc(nsock * sizeof(*npfds), 326 KM_NOSLEEP); 327 if (npfds || nsock == 0) { 328 if (pfds) 329 kmem_free(pfds, cursock*sizeof(*pfds)); 330 pfds = npfds; 331 cursock = nsock; 332 rebuild = false; 333 334 i = 0; 335 LIST_FOREACH(su_iter, &su_ent, su_entries) { 336 pfds[i].fd = SO2S(su_iter->su_so); 337 pfds[i].events = POLLIN; 338 pfds[i].revents = 0; 339 i++; 340 } 341 KASSERT(i == nsock); 342 } 343 mutex_exit(&su_mtx); 344 } 345 346 /* find affected sockets & process */ 347 error = rumpcomp_sockin_poll(pfds, cursock, POLLTIMEOUT, &rv); 348 for (i = 0; i < cursock && rv > 0 && error == 0; i++) { 349 if (pfds[i].revents & POLLIN) { 350 mutex_enter(&su_mtx); 351 LIST_FOREACH(su_iter, &su_ent, su_entries) { 352 if (SO2S(su_iter->su_so)==pfds[i].fd) { 353 so = su_iter->su_so; 354 mutex_exit(&su_mtx); 355 if(so->so_options&SO_ACCEPTCONN) 356 sockin_accept(so); 357 else 358 sockin_process(so); 359 mutex_enter(&su_mtx); 360 break; 361 } 362 } 363 /* if we can't find it, just wing it */ 364 KASSERT(rebuild || su_iter); 365 mutex_exit(&su_mtx); 366 pfds[i].revents = 0; 367 rv--; 368 i = -1; 369 continue; 370 } 371 372 /* something else? ignore */ 373 if (pfds[i].revents) { 374 pfds[i].revents = 0; 375 rv--; 376 } 377 } 378 KASSERT(rv <= 0); 379 } 380 381 } 382 383 static int 384 sockin_do_init(void) 385 { 386 int rv; 387 388 if (rump_threads) { 389 if ((rv = kthread_create(PRI_NONE, 0, NULL, sockinworker, 390 NULL, NULL, "sockwork")) != 0) 391 panic("sockin_init: could not create worker thread\n"); 392 } else { 393 printf("sockin_init: no threads => no worker thread\n"); 394 } 395 mutex_init(&su_mtx, MUTEX_DEFAULT, IPL_NONE); 396 strlcpy(sockin_if.if_xname, "sockin0", sizeof(sockin_if.if_xname)); 397 bpf_attach(&sockin_if, DLT_NULL, 0); 398 return 0; 399 } 400 401 static void 402 sockin_init(void) 403 { 404 static ONCE_DECL(init); 405 406 RUN_ONCE(&init, sockin_do_init); 407 } 408 409 static int 410 sockin_attach(struct socket *so, int proto) 411 { 412 const int type = so->so_proto->pr_type; 413 int error, news, family; 414 415 sosetlock(so); 416 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 417 error = soreserve(so, SOCKIN_SBSIZE, SOCKIN_SBSIZE); 418 if (error) 419 return error; 420 } 421 422 family = so->so_proto->pr_domain->dom_family; 423 KASSERT(family == PF_INET || family == PF_INET6); 424 error = rumpcomp_sockin_socket(family, type, 0, &news); 425 if (error) 426 return error; 427 428 /* For UDP sockets, make sure we can send/recv maximum. */ 429 if (type == SOCK_DGRAM) { 430 int sbsize = SOCKIN_SBSIZE; 431 error = rumpcomp_sockin_setsockopt(news, 432 SOL_SOCKET, SO_SNDBUF, 433 &sbsize, sizeof(sbsize)); 434 sbsize = SOCKIN_SBSIZE; 435 error = rumpcomp_sockin_setsockopt(news, 436 SOL_SOCKET, SO_RCVBUF, 437 &sbsize, sizeof(sbsize)); 438 } 439 440 if ((error = registersock(so, news)) != 0) 441 rumpuser_close(news); 442 443 return error; 444 } 445 446 static void 447 sockin_detach(struct socket *so) 448 { 449 panic("sockin_detach: IMPLEMENT ME\n"); 450 } 451 452 static int 453 sockin_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 454 struct mbuf *control, struct lwp *l) 455 { 456 int error = 0; 457 458 switch (req) { 459 case PRU_ACCEPT: 460 /* we do all the work in the worker thread */ 461 break; 462 463 case PRU_BIND: 464 error = rumpcomp_sockin_bind(SO2S(so), 465 mtod(nam, const struct sockaddr *), 466 nam->m_len); 467 break; 468 469 case PRU_CONNECT: 470 error = rumpcomp_sockin_connect(SO2S(so), 471 mtod(nam, struct sockaddr *), nam->m_len); 472 if (error == 0) 473 soisconnected(so); 474 break; 475 476 case PRU_LISTEN: 477 error = rumpcomp_sockin_listen(SO2S(so), so->so_qlimit); 478 break; 479 480 case PRU_SEND: 481 { 482 struct sockaddr *saddr; 483 struct msghdr mhdr; 484 size_t iov_max, i; 485 struct iovec iov_buf[32], *iov; 486 struct mbuf *m2; 487 size_t tot, n; 488 int s; 489 490 bpf_mtap_af(&sockin_if, AF_UNSPEC, m); 491 492 memset(&mhdr, 0, sizeof(mhdr)); 493 494 iov_max = 0; 495 for (m2 = m; m2 != NULL; m2 = m2->m_next) { 496 iov_max++; 497 } 498 499 if (iov_max <= __arraycount(iov_buf)) { 500 iov = iov_buf; 501 } else { 502 iov = kmem_alloc(sizeof(struct iovec) * iov_max, 503 KM_SLEEP); 504 } 505 506 tot = 0; 507 for (i = 0, m2 = m; m2 != NULL; m2 = m2->m_next, i++) { 508 iov[i].iov_base = m2->m_data; 509 iov[i].iov_len = m2->m_len; 510 tot += m2->m_len; 511 } 512 mhdr.msg_iov = iov; 513 mhdr.msg_iovlen = i; 514 s = SO2S(so); 515 516 if (nam != NULL) { 517 saddr = mtod(nam, struct sockaddr *); 518 mhdr.msg_name = saddr; 519 mhdr.msg_namelen = saddr->sa_len; 520 } 521 522 rumpcomp_sockin_sendmsg(s, &mhdr, 0, &n); 523 524 if (iov != iov_buf) 525 kmem_free(iov, sizeof(struct iovec) * iov_max); 526 527 m_freem(m); 528 m_freem(control); 529 530 /* this assumes too many things to list.. buthey, testing */ 531 if (!rump_threads) 532 sockin_process(so); 533 } 534 break; 535 536 case PRU_SHUTDOWN: 537 removesock(so); 538 break; 539 540 case PRU_SOCKADDR: 541 case PRU_PEERADDR: 542 { 543 int slen = nam->m_len; 544 enum rumpcomp_sockin_getnametype which; 545 546 if (req == PRU_SOCKADDR) 547 which = RUMPCOMP_SOCKIN_SOCKNAME; 548 else 549 which = RUMPCOMP_SOCKIN_PEERNAME; 550 error = rumpcomp_sockin_getname(SO2S(so), 551 mtod(nam, struct sockaddr *), &slen, which); 552 if (error == 0) 553 nam->m_len = slen; 554 break; 555 } 556 557 case PRU_CONTROL: 558 error = ENOTTY; 559 break; 560 561 default: 562 panic("sockin_usrreq: IMPLEMENT ME, req %d not supported", req); 563 } 564 565 return error; 566 } 567 568 static int 569 sockin_ctloutput(int op, struct socket *so, struct sockopt *sopt) 570 { 571 572 return rumpcomp_sockin_setsockopt(SO2S(so), sopt->sopt_level, 573 sopt->sopt_name, sopt->sopt_data, sopt->sopt_size); 574 } 575 576 int sockin_unavailable(void); 577 int 578 sockin_unavailable(void) 579 { 580 581 panic("interface not available in with sockin"); 582 } 583 __strong_alias(rtrequest,sockin_unavailable); 584 __strong_alias(ifunit,sockin_unavailable); 585 __strong_alias(ifreq_setaddr,sockin_unavailable); 586