1 /* $NetBSD: sockin.c,v 1.67 2022/09/03 02:53:18 thorpej Exp $ */ 2 3 /* 4 * Copyright (c) 2008, 2009 Antti Kantee. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __KERNEL_RCSID(0, "$NetBSD: sockin.c,v 1.67 2022/09/03 02:53:18 thorpej Exp $"); 30 31 #include <sys/param.h> 32 #include <sys/condvar.h> 33 #include <sys/domain.h> 34 #include <sys/kmem.h> 35 #include <sys/kthread.h> 36 #include <sys/mbuf.h> 37 #include <sys/mutex.h> 38 #include <sys/once.h> 39 #include <sys/poll.h> 40 #include <sys/protosw.h> 41 #include <sys/queue.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/time.h> 45 46 #include <net/bpf.h> 47 #include <net/if.h> 48 #include <net/radix.h> 49 50 #include <netinet/in.h> 51 #include <netinet/in_systm.h> 52 #include <netinet/ip.h> 53 54 #include <rump-sys/kern.h> 55 56 #include <rump/rumpuser.h> 57 58 #include "sockin_user.h" 59 60 /* 61 * An inet communication domain which uses the socket interface. 62 * Supports IPv4 & IPv6 UDP/TCP. 63 */ 64 65 DOMAIN_DEFINE(sockindomain); 66 DOMAIN_DEFINE(sockin6domain); 67 68 static int sockin_do_init(void); 69 static void sockin_init(void); 70 static int sockin_attach(struct socket *, int); 71 static void sockin_detach(struct socket *); 72 static int sockin_accept(struct socket *, struct sockaddr *); 73 static int sockin_connect2(struct socket *, struct socket *); 74 static int sockin_bind(struct socket *, struct sockaddr *, struct lwp *); 75 static int sockin_listen(struct socket *, struct lwp *); 76 static int sockin_connect(struct socket *, struct sockaddr *, struct lwp *); 77 static int sockin_disconnect(struct socket *); 78 static int sockin_shutdown(struct socket *); 79 static int sockin_abort(struct socket *); 80 static int sockin_ioctl(struct socket *, u_long, void *, struct ifnet *); 81 static int sockin_stat(struct socket *, struct stat *); 82 static int sockin_peeraddr(struct socket *, struct sockaddr *); 83 static int sockin_sockaddr(struct socket *, struct sockaddr *); 84 static int sockin_rcvd(struct socket *, int, struct lwp *); 85 static int sockin_recvoob(struct socket *, struct mbuf *, int); 86 static int sockin_send(struct socket *, struct mbuf *, struct sockaddr *, 87 struct mbuf *, struct lwp *); 88 static int sockin_sendoob(struct socket *, struct mbuf *, struct mbuf *); 89 static int sockin_purgeif(struct socket *, struct ifnet *); 90 static int sockin_ctloutput(int op, struct socket *, struct sockopt *); 91 92 static const struct pr_usrreqs sockin_usrreqs = { 93 .pr_attach = sockin_attach, 94 .pr_detach = sockin_detach, 95 .pr_accept = sockin_accept, 96 .pr_bind = sockin_bind, 97 .pr_listen = sockin_listen, 98 .pr_connect = sockin_connect, 99 .pr_connect2 = sockin_connect2, 100 .pr_disconnect = sockin_disconnect, 101 .pr_shutdown = sockin_shutdown, 102 .pr_abort = sockin_abort, 103 .pr_ioctl = sockin_ioctl, 104 .pr_stat = sockin_stat, 105 .pr_peeraddr = sockin_peeraddr, 106 .pr_sockaddr = sockin_sockaddr, 107 .pr_rcvd = sockin_rcvd, 108 .pr_recvoob = sockin_recvoob, 109 .pr_send = sockin_send, 110 .pr_sendoob = sockin_sendoob, 111 .pr_purgeif = sockin_purgeif, 112 }; 113 114 const struct protosw sockinsw[] = { 115 { 116 .pr_type = SOCK_DGRAM, 117 .pr_domain = &sockindomain, 118 .pr_protocol = IPPROTO_UDP, 119 .pr_flags = PR_ATOMIC|PR_ADDR, 120 .pr_usrreqs = &sockin_usrreqs, 121 .pr_ctloutput = sockin_ctloutput, 122 }, 123 { 124 .pr_type = SOCK_STREAM, 125 .pr_domain = &sockindomain, 126 .pr_protocol = IPPROTO_TCP, 127 .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS, 128 .pr_usrreqs = &sockin_usrreqs, 129 .pr_ctloutput = sockin_ctloutput, 130 }}; 131 const struct protosw sockin6sw[] = { 132 { 133 .pr_type = SOCK_DGRAM, 134 .pr_domain = &sockin6domain, 135 .pr_protocol = IPPROTO_UDP, 136 .pr_flags = PR_ATOMIC|PR_ADDR, 137 .pr_usrreqs = &sockin_usrreqs, 138 .pr_ctloutput = sockin_ctloutput, 139 }, 140 { 141 .pr_type = SOCK_STREAM, 142 .pr_domain = &sockin6domain, 143 .pr_protocol = IPPROTO_TCP, 144 .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_ABRTACPTDIS, 145 .pr_usrreqs = &sockin_usrreqs, 146 .pr_ctloutput = sockin_ctloutput, 147 }}; 148 149 struct domain sockindomain = { 150 .dom_family = PF_INET, 151 .dom_name = "socket_inet", 152 .dom_init = sockin_init, 153 .dom_externalize = NULL, 154 .dom_dispose = NULL, 155 .dom_protosw = sockinsw, 156 .dom_protoswNPROTOSW = &sockinsw[__arraycount(sockinsw)], 157 .dom_rtattach = rt_inithead, 158 .dom_rtoffset = 32, 159 .dom_maxrtkey = sizeof(struct sockaddr_in), 160 .dom_ifattach = NULL, 161 .dom_ifdetach = NULL, 162 .dom_link = { NULL }, 163 .dom_mowner = MOWNER_INIT("",""), 164 .dom_sockaddr_cmp = NULL 165 }; 166 struct domain sockin6domain = { 167 .dom_family = PF_INET6, 168 .dom_name = "socket_inet6", 169 .dom_init = sockin_init, 170 .dom_externalize = NULL, 171 .dom_dispose = NULL, 172 .dom_protosw = sockin6sw, 173 .dom_protoswNPROTOSW = &sockin6sw[__arraycount(sockin6sw)], 174 .dom_rtattach = rt_inithead, 175 .dom_rtoffset = 32, 176 .dom_maxrtkey = sizeof(struct sockaddr_in6), 177 .dom_ifattach = NULL, 178 .dom_ifdetach = NULL, 179 .dom_link = { NULL }, 180 .dom_mowner = MOWNER_INIT("",""), 181 .dom_sockaddr_cmp = NULL 182 }; 183 184 #define SO2S(so) ((intptr_t)(so->so_internal)) 185 #define SOCKIN_SBSIZE 65536 186 187 struct sockin_unit { 188 struct socket *su_so; 189 190 LIST_ENTRY(sockin_unit) su_entries; 191 }; 192 static LIST_HEAD(, sockin_unit) su_ent = LIST_HEAD_INITIALIZER(su_ent); 193 static kmutex_t su_mtx; 194 static bool rebuild; 195 static int nsock; 196 197 /* XXX: for the bpf hack */ 198 static struct ifnet sockin_if; 199 int ifpromisc(struct ifnet *ifp, int pswitch) { return 0; } 200 201 static int 202 registersock(struct socket *so, int news) 203 { 204 struct sockin_unit *su; 205 206 su = kmem_alloc(sizeof(*su), KM_NOSLEEP); 207 if (!su) 208 return ENOMEM; 209 210 so->so_internal = (void *)(intptr_t)news; 211 su->su_so = so; 212 213 mutex_enter(&su_mtx); 214 LIST_INSERT_HEAD(&su_ent, su, su_entries); 215 nsock++; 216 rebuild = true; 217 mutex_exit(&su_mtx); 218 219 return 0; 220 } 221 222 static void 223 removesock(struct socket *so) 224 { 225 struct sockin_unit *su_iter; 226 227 mutex_enter(&su_mtx); 228 LIST_FOREACH(su_iter, &su_ent, su_entries) { 229 if (su_iter->su_so == so) 230 break; 231 } 232 if (!su_iter) 233 panic("no such socket"); 234 235 LIST_REMOVE(su_iter, su_entries); 236 nsock--; 237 rebuild = true; 238 mutex_exit(&su_mtx); 239 240 rumpuser_close(SO2S(su_iter->su_so)); 241 kmem_free(su_iter, sizeof(*su_iter)); 242 } 243 244 static void 245 sockin_process(struct socket *so) 246 { 247 struct sockaddr_in6 from; 248 struct iovec io; 249 struct msghdr rmsg; 250 struct mbuf *m; 251 size_t n, plen; 252 int error; 253 254 m = m_gethdr(M_WAIT, MT_DATA); 255 if (so->so_proto->pr_type == SOCK_DGRAM) { 256 plen = IP_MAXPACKET; 257 MEXTMALLOC(m, plen, M_DONTWAIT); 258 } else { 259 plen = MCLBYTES; 260 MCLGET(m, M_DONTWAIT); 261 } 262 if ((m->m_flags & M_EXT) == 0) { 263 m_freem(m); 264 return; 265 } 266 267 memset(&rmsg, 0, sizeof(rmsg)); 268 io.iov_base = mtod(m, void *); 269 io.iov_len = plen; 270 rmsg.msg_iov = &io; 271 rmsg.msg_iovlen = 1; 272 rmsg.msg_name = (struct sockaddr *)&from; 273 rmsg.msg_namelen = sizeof(from); 274 275 error = rumpcomp_sockin_recvmsg(SO2S(so), &rmsg, 0, &n); 276 if (error || n == 0) { 277 m_freem(m); 278 279 /* Treat a TCP socket a goner */ 280 if (error != EAGAIN && so->so_proto->pr_type == SOCK_STREAM) { 281 mutex_enter(softnet_lock); 282 soisdisconnected(so); 283 mutex_exit(softnet_lock); 284 removesock(so); 285 } 286 return; 287 } 288 m->m_len = m->m_pkthdr.len = n; 289 290 bpf_mtap_af(&sockin_if, AF_UNSPEC, m, BPF_D_IN); 291 292 mutex_enter(softnet_lock); 293 if (so->so_proto->pr_type == SOCK_DGRAM) { 294 if (!sbappendaddr(&so->so_rcv, rmsg.msg_name, m, NULL)) { 295 m_freem(m); 296 } 297 } else { 298 sbappendstream(&so->so_rcv, m); 299 } 300 301 sorwakeup(so); 302 mutex_exit(softnet_lock); 303 } 304 305 static void 306 sockin_waccept(struct socket *so) 307 { 308 struct socket *nso; 309 struct sockaddr_in6 sin; 310 int news, error, slen; 311 312 slen = sizeof(sin); 313 error = rumpcomp_sockin_accept(SO2S(so), (struct sockaddr *)&sin, 314 &slen, &news); 315 if (error) 316 return; 317 318 mutex_enter(softnet_lock); 319 nso = sonewconn(so, true); 320 if (nso == NULL) 321 goto errout; 322 if (registersock(nso, news) != 0) 323 goto errout; 324 mutex_exit(softnet_lock); 325 return; 326 327 errout: 328 rumpuser_close(news); 329 if (nso) 330 soclose(nso); 331 mutex_exit(softnet_lock); 332 } 333 334 #define POLLTIMEOUT 100 /* check for new entries every 100ms */ 335 336 /* XXX: doesn't handle socket (kernel) locking properly? */ 337 static void 338 sockinworker(void *arg) 339 { 340 struct pollfd *pfds = NULL, *npfds; 341 struct sockin_unit *su_iter; 342 struct socket *so; 343 int cursock = 0, i, rv, error; 344 345 /* 346 * Loop reading requests. Check for new sockets periodically 347 * (could be smarter, but I'm lazy). 348 */ 349 for (;;) { 350 if (rebuild) { 351 npfds = NULL; 352 mutex_enter(&su_mtx); 353 if (nsock) 354 npfds = kmem_alloc(nsock * sizeof(*npfds), 355 KM_NOSLEEP); 356 if (npfds || nsock == 0) { 357 if (pfds) 358 kmem_free(pfds, cursock*sizeof(*pfds)); 359 pfds = npfds; 360 cursock = nsock; 361 rebuild = false; 362 363 i = 0; 364 LIST_FOREACH(su_iter, &su_ent, su_entries) { 365 pfds[i].fd = SO2S(su_iter->su_so); 366 pfds[i].events = POLLIN; 367 pfds[i].revents = 0; 368 i++; 369 } 370 KASSERT(i == nsock); 371 } 372 mutex_exit(&su_mtx); 373 } 374 375 /* find affected sockets & process */ 376 error = rumpcomp_sockin_poll(pfds, cursock, POLLTIMEOUT, &rv); 377 for (i = 0; i < cursock && rv > 0 && error == 0; i++) { 378 if (pfds[i].revents & POLLIN) { 379 mutex_enter(&su_mtx); 380 LIST_FOREACH(su_iter, &su_ent, su_entries) { 381 if (SO2S(su_iter->su_so)==pfds[i].fd) { 382 so = su_iter->su_so; 383 mutex_exit(&su_mtx); 384 if(so->so_options&SO_ACCEPTCONN) 385 sockin_waccept(so); 386 else 387 sockin_process(so); 388 mutex_enter(&su_mtx); 389 break; 390 } 391 } 392 /* if we can't find it, just wing it */ 393 KASSERT(rebuild || su_iter); 394 mutex_exit(&su_mtx); 395 pfds[i].revents = 0; 396 rv--; 397 i = -1; 398 continue; 399 } 400 401 /* something else? ignore */ 402 if (pfds[i].revents) { 403 pfds[i].revents = 0; 404 rv--; 405 } 406 } 407 KASSERT(rv <= 0); 408 } 409 410 } 411 412 static int 413 sockin_do_init(void) 414 { 415 int rv; 416 417 if (rump_threads) { 418 if ((rv = kthread_create(PRI_NONE, 0, NULL, sockinworker, 419 NULL, NULL, "sockwork")) != 0) 420 panic("sockin_init: could not create worker thread\n"); 421 } else { 422 printf("sockin_init: no threads => no worker thread\n"); 423 } 424 mutex_init(&su_mtx, MUTEX_DEFAULT, IPL_NONE); 425 strlcpy(sockin_if.if_xname, "sockin0", sizeof(sockin_if.if_xname)); 426 bpf_attach(&sockin_if, DLT_NULL, 0); 427 return 0; 428 } 429 430 static void 431 sockin_init(void) 432 { 433 static ONCE_DECL(init); 434 435 RUN_ONCE(&init, sockin_do_init); 436 } 437 438 static int 439 sockin_attach(struct socket *so, int proto) 440 { 441 const int type = so->so_proto->pr_type; 442 int error, news, family; 443 444 sosetlock(so); 445 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 446 error = soreserve(so, SOCKIN_SBSIZE, SOCKIN_SBSIZE); 447 if (error) 448 return error; 449 } 450 451 family = so->so_proto->pr_domain->dom_family; 452 KASSERT(family == PF_INET || family == PF_INET6); 453 error = rumpcomp_sockin_socket(family, type, 0, &news); 454 if (error) 455 return error; 456 457 /* For UDP sockets, make sure we can send/recv maximum. */ 458 if (type == SOCK_DGRAM) { 459 int sbsize = SOCKIN_SBSIZE; 460 error = rumpcomp_sockin_setsockopt(news, 461 SOL_SOCKET, SO_SNDBUF, 462 &sbsize, sizeof(sbsize)); 463 sbsize = SOCKIN_SBSIZE; 464 error = rumpcomp_sockin_setsockopt(news, 465 SOL_SOCKET, SO_RCVBUF, 466 &sbsize, sizeof(sbsize)); 467 } 468 469 if ((error = registersock(so, news)) != 0) 470 rumpuser_close(news); 471 472 return error; 473 } 474 475 static void 476 sockin_detach(struct socket *so) 477 { 478 panic("sockin_detach: IMPLEMENT ME\n"); 479 } 480 481 static int 482 sockin_accept(struct socket *so, struct sockaddr *nam) 483 { 484 KASSERT(solocked(so)); 485 486 /* we do all the work in the worker thread */ 487 return 0; 488 } 489 490 static int 491 sockin_bind(struct socket *so, struct sockaddr *nam, struct lwp *l) 492 { 493 KASSERT(solocked(so)); 494 KASSERT(nam != NULL); 495 496 return rumpcomp_sockin_bind(SO2S(so), nam, nam->sa_len); 497 } 498 499 static int 500 sockin_listen(struct socket *so, struct lwp *l) 501 { 502 KASSERT(solocked(so)); 503 504 return rumpcomp_sockin_listen(SO2S(so), so->so_qlimit); 505 } 506 507 static int 508 sockin_connect(struct socket *so, struct sockaddr *nam, struct lwp *l) 509 { 510 int error = 0; 511 512 KASSERT(solocked(so)); 513 KASSERT(nam != NULL); 514 515 error = rumpcomp_sockin_connect(SO2S(so), nam, nam->sa_len); 516 if (error == 0) 517 soisconnected(so); 518 519 return error; 520 } 521 522 static int 523 sockin_connect2(struct socket *so, struct socket *so2) 524 { 525 KASSERT(solocked(so)); 526 527 panic("sockin_connect2: IMPLEMENT ME, connect2 not supported"); 528 } 529 530 static int 531 sockin_disconnect(struct socket *so) 532 { 533 KASSERT(solocked(so)); 534 535 panic("sockin_disconnect: IMPLEMENT ME, disconnect not supported"); 536 } 537 538 static int 539 sockin_shutdown(struct socket *so) 540 { 541 KASSERT(solocked(so)); 542 543 removesock(so); 544 return 0; 545 } 546 547 static int 548 sockin_abort(struct socket *so) 549 { 550 KASSERT(solocked(so)); 551 552 panic("sockin_abort: IMPLEMENT ME, abort not supported"); 553 } 554 555 static int 556 sockin_ioctl(struct socket *so, u_long cmd, void *nam, struct ifnet *ifp) 557 { 558 return ENOTTY; 559 } 560 561 static int 562 sockin_stat(struct socket *so, struct stat *ub) 563 { 564 KASSERT(solocked(so)); 565 566 return 0; 567 } 568 569 static int 570 sockin_peeraddr(struct socket *so, struct sockaddr *nam) 571 { 572 KASSERT(solocked(so)); 573 574 int error = 0; 575 int slen = nam->sa_len; 576 577 error = rumpcomp_sockin_getname(SO2S(so), 578 nam, &slen, RUMPCOMP_SOCKIN_PEERNAME); 579 if (error == 0) 580 nam->sa_len = slen; 581 return error; 582 } 583 584 static int 585 sockin_sockaddr(struct socket *so, struct sockaddr *nam) 586 { 587 KASSERT(solocked(so)); 588 589 int error = 0; 590 int slen = nam->sa_len; 591 592 error = rumpcomp_sockin_getname(SO2S(so), 593 nam, &slen, RUMPCOMP_SOCKIN_SOCKNAME); 594 if (error == 0) 595 nam->sa_len = slen; 596 return error; 597 } 598 599 static int 600 sockin_rcvd(struct socket *so, int flags, struct lwp *l) 601 { 602 KASSERT(solocked(so)); 603 604 panic("sockin_rcvd: IMPLEMENT ME, rcvd not supported"); 605 } 606 607 static int 608 sockin_recvoob(struct socket *so, struct mbuf *m, int flags) 609 { 610 KASSERT(solocked(so)); 611 612 panic("sockin_recvoob: IMPLEMENT ME, recvoob not supported"); 613 } 614 615 static int 616 sockin_send(struct socket *so, struct mbuf *m, struct sockaddr *saddr, 617 struct mbuf *control, struct lwp *l) 618 { 619 struct msghdr mhdr; 620 size_t iov_max, i; 621 struct iovec iov_buf[32], *iov; 622 struct mbuf *m2; 623 size_t tot, n; 624 int error = 0; 625 int s; 626 627 bpf_mtap_af(&sockin_if, AF_UNSPEC, m, BPF_D_OUT); 628 629 memset(&mhdr, 0, sizeof(mhdr)); 630 631 iov_max = 0; 632 for (m2 = m; m2 != NULL; m2 = m2->m_next) { 633 iov_max++; 634 } 635 636 if (iov_max <= __arraycount(iov_buf)) { 637 iov = iov_buf; 638 } else { 639 iov = kmem_alloc(sizeof(struct iovec) * iov_max, 640 KM_SLEEP); 641 } 642 643 tot = 0; 644 for (i = 0, m2 = m; m2 != NULL; m2 = m2->m_next, i++) { 645 iov[i].iov_base = m2->m_data; 646 iov[i].iov_len = m2->m_len; 647 tot += m2->m_len; 648 } 649 mhdr.msg_iov = iov; 650 mhdr.msg_iovlen = i; 651 s = SO2S(so); 652 653 if (saddr != NULL) { 654 mhdr.msg_name = saddr; 655 mhdr.msg_namelen = saddr->sa_len; 656 } 657 658 rumpcomp_sockin_sendmsg(s, &mhdr, 0, &n); 659 660 if (iov != iov_buf) 661 kmem_free(iov, sizeof(struct iovec) * iov_max); 662 663 m_freem(m); 664 m_freem(control); 665 666 /* this assumes too many things to list.. buthey, testing */ 667 if (!rump_threads) 668 sockin_process(so); 669 670 return error; 671 } 672 673 static int 674 sockin_sendoob(struct socket *so, struct mbuf *m, struct mbuf *control) 675 { 676 KASSERT(solocked(so)); 677 678 panic("sockin_sendoob: IMPLEMENT ME, sendoob not supported"); 679 } 680 681 static int 682 sockin_purgeif(struct socket *so, struct ifnet *ifp) 683 { 684 685 panic("sockin_purgeif: IMPLEMENT ME, purgeif not supported"); 686 } 687 688 static int 689 sockin_ctloutput(int op, struct socket *so, struct sockopt *sopt) 690 { 691 692 return rumpcomp_sockin_setsockopt(SO2S(so), sopt->sopt_level, 693 sopt->sopt_name, sopt->sopt_data, sopt->sopt_size); 694 } 695 696 int sockin_unavailable(void); 697 int 698 sockin_unavailable(void) 699 { 700 701 panic("interface not available in with sockin"); 702 } 703 __strong_alias(rtrequest,sockin_unavailable); 704 __strong_alias(ifunit,sockin_unavailable); 705 __strong_alias(ifreq_setaddr,sockin_unavailable); 706 __strong_alias(rt_delete_matched_entries,sockin_unavailable); 707