1 /* 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 30 * $FreeBSD: src/sys/kern/uipc_usrreq.c,v 1.54.2.10 2003/03/04 17:28:09 nectar Exp $ 31 */ 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/domain.h> 37 #include <sys/fcntl.h> 38 #include <sys/malloc.h> /* XXX must be before <sys/file.h> */ 39 #include <sys/proc.h> 40 #include <sys/file.h> 41 #include <sys/filedesc.h> 42 #include <sys/mbuf.h> 43 #include <sys/nlookup.h> 44 #include <sys/protosw.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/resourcevar.h> 48 #include <sys/stat.h> 49 #include <sys/mount.h> 50 #include <sys/sysctl.h> 51 #include <sys/un.h> 52 #include <sys/unpcb.h> 53 #include <sys/vnode.h> 54 #include <sys/kern_syscall.h> 55 #include <sys/taskqueue.h> 56 57 #include <sys/file2.h> 58 #include <sys/spinlock2.h> 59 #include <sys/socketvar2.h> 60 #include <sys/msgport2.h> 61 62 #define UNP_DETACHED UNP_PRIVATE1 63 #define UNP_CONNECTING UNP_PRIVATE2 64 #define UNP_DROPPED UNP_PRIVATE3 65 #define UNP_MARKER UNP_PRIVATE4 66 67 #define UNP_ISATTACHED(unp) \ 68 ((unp) != NULL && ((unp)->unp_flags & UNP_DETACHED) == 0) 69 70 #ifdef INVARIANTS 71 #define UNP_ASSERT_TOKEN_HELD(unp) \ 72 ASSERT_LWKT_TOKEN_HELD(lwkt_token_pool_lookup((unp))) 73 #else /* !INVARIANTS */ 74 #define UNP_ASSERT_TOKEN_HELD(unp) 75 #endif /* INVARIANTS */ 76 77 struct unp_defdiscard { 78 SLIST_ENTRY(unp_defdiscard) next; 79 struct file *fp; 80 }; 81 SLIST_HEAD(unp_defdiscard_list, unp_defdiscard); 82 83 TAILQ_HEAD(unpcb_qhead, unpcb); 84 struct unp_global_head { 85 struct unpcb_qhead list; 86 int count; 87 }; 88 89 static MALLOC_DEFINE(M_UNPCB, "unpcb", "unpcb struct"); 90 static unp_gen_t unp_gencnt; 91 92 static struct unp_global_head unp_stream_head; 93 static struct unp_global_head unp_dgram_head; 94 static struct unp_global_head unp_seqpkt_head; 95 96 static struct lwkt_token unp_token = LWKT_TOKEN_INITIALIZER(unp_token); 97 static struct taskqueue *unp_taskqueue; 98 99 static struct unp_defdiscard_list unp_defdiscard_head; 100 static struct spinlock unp_defdiscard_spin; 101 static struct task unp_defdiscard_task; 102 103 /* 104 * Unix communications domain. 105 * 106 * TODO: 107 * RDM 108 * rethink name space problems 109 * need a proper out-of-band 110 * lock pushdown 111 */ 112 static struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; 113 static ino_t unp_ino = 1; /* prototype for fake inode numbers */ 114 115 static int unp_attach (struct socket *, struct pru_attach_info *); 116 static void unp_detach (struct unpcb *); 117 static int unp_bind (struct unpcb *,struct sockaddr *, struct thread *); 118 static int unp_connect (struct socket *,struct sockaddr *, 119 struct thread *); 120 static void unp_disconnect(struct unpcb *, int); 121 static void unp_shutdown (struct unpcb *); 122 static void unp_gc (void); 123 static int unp_gc_clearmarks(struct file *, void *); 124 static int unp_gc_checkmarks(struct file *, void *); 125 static int unp_gc_checkrefs(struct file *, void *); 126 static int unp_revoke_gc_check(struct file *, void *); 127 static void unp_scan (struct mbuf *, void (*)(struct file *, void *), 128 void *data); 129 static void unp_mark (struct file *, void *data); 130 static void unp_discard (struct file *, void *); 131 static int unp_internalize (struct mbuf *, struct thread *); 132 static int unp_listen (struct unpcb *, struct thread *); 133 static void unp_fp_externalize(struct lwp *lp, struct file *fp, int fd); 134 static int unp_find_lockref(struct sockaddr *nam, struct thread *td, 135 short type, struct unpcb **unp_ret); 136 static int unp_connect_pair(struct unpcb *unp, struct unpcb *unp2); 137 static void unp_drop(struct unpcb *unp, int error); 138 static void unp_defdiscard_taskfunc(void *, int); 139 140 /* 141 * SMP Considerations: 142 * 143 * Since unp_token will be automaticly released upon execution of 144 * blocking code, we need to reference unp_conn before any possible 145 * blocking code to prevent it from being ripped behind our back. 146 * 147 * Any adjustment to unp->unp_conn requires both the global unp_token 148 * AND the per-unp token (lwkt_token_pool_lookup(unp)) to be held. 149 * 150 * Any access to so_pcb to obtain unp requires the pool token for 151 * unp to be held. 152 */ 153 154 static __inline void 155 unp_reference(struct unpcb *unp) 156 { 157 /* 0->1 transition will not work */ 158 KKASSERT(unp->unp_refcnt > 0); 159 atomic_add_int(&unp->unp_refcnt, 1); 160 } 161 162 static __inline void 163 unp_free(struct unpcb *unp) 164 { 165 KKASSERT(unp->unp_refcnt > 0); 166 if (atomic_fetchadd_int(&unp->unp_refcnt, -1) == 1) 167 unp_detach(unp); 168 } 169 170 static __inline struct unpcb * 171 unp_getsocktoken(struct socket *so) 172 { 173 struct unpcb *unp; 174 175 /* 176 * The unp pointer is invalid until we verify that it is 177 * good by re-checking so_pcb AFTER obtaining the token. 178 */ 179 while ((unp = so->so_pcb) != NULL) { 180 lwkt_getpooltoken(unp); 181 if (unp == so->so_pcb) 182 break; 183 lwkt_relpooltoken(unp); 184 } 185 return unp; 186 } 187 188 static __inline void 189 unp_reltoken(struct unpcb *unp) 190 { 191 if (unp != NULL) 192 lwkt_relpooltoken(unp); 193 } 194 195 static __inline void 196 unp_setflags(struct unpcb *unp, int flags) 197 { 198 atomic_set_int(&unp->unp_flags, flags); 199 } 200 201 static __inline void 202 unp_clrflags(struct unpcb *unp, int flags) 203 { 204 atomic_clear_int(&unp->unp_flags, flags); 205 } 206 207 static __inline struct unp_global_head * 208 unp_globalhead(short type) 209 { 210 switch (type) { 211 case SOCK_STREAM: 212 return &unp_stream_head; 213 case SOCK_DGRAM: 214 return &unp_dgram_head; 215 case SOCK_SEQPACKET: 216 return &unp_seqpkt_head; 217 default: 218 panic("unknown socket type %d", type); 219 } 220 } 221 222 /* 223 * NOTE: (so) is referenced from soabort*() and netmsg_pru_abort() 224 * will sofree() it when we return. 225 */ 226 static void 227 uipc_abort(netmsg_t msg) 228 { 229 struct unpcb *unp; 230 int error; 231 232 lwkt_gettoken(&unp_token); 233 unp = unp_getsocktoken(msg->base.nm_so); 234 235 if (UNP_ISATTACHED(unp)) { 236 unp_setflags(unp, UNP_DETACHED); 237 unp_drop(unp, ECONNABORTED); 238 unp_free(unp); 239 error = 0; 240 } else { 241 error = EINVAL; 242 } 243 244 unp_reltoken(unp); 245 lwkt_reltoken(&unp_token); 246 247 lwkt_replymsg(&msg->lmsg, error); 248 } 249 250 static void 251 uipc_accept(netmsg_t msg) 252 { 253 struct unpcb *unp; 254 int error; 255 256 lwkt_gettoken(&unp_token); 257 unp = unp_getsocktoken(msg->base.nm_so); 258 259 if (!UNP_ISATTACHED(unp)) { 260 error = EINVAL; 261 } else { 262 struct unpcb *unp2 = unp->unp_conn; 263 264 /* 265 * Pass back name of connected socket, 266 * if it was bound and we are still connected 267 * (our peer may have closed already!). 268 */ 269 if (unp2 && unp2->unp_addr) { 270 unp_reference(unp2); 271 *msg->accept.nm_nam = dup_sockaddr( 272 (struct sockaddr *)unp2->unp_addr); 273 unp_free(unp2); 274 } else { 275 *msg->accept.nm_nam = dup_sockaddr(&sun_noname); 276 } 277 error = 0; 278 } 279 280 unp_reltoken(unp); 281 lwkt_reltoken(&unp_token); 282 283 lwkt_replymsg(&msg->lmsg, error); 284 } 285 286 static void 287 uipc_attach(netmsg_t msg) 288 { 289 int error; 290 291 lwkt_gettoken(&unp_token); 292 293 KASSERT(msg->base.nm_so->so_pcb == NULL, ("double unp attach")); 294 error = unp_attach(msg->base.nm_so, msg->attach.nm_ai); 295 296 lwkt_reltoken(&unp_token); 297 lwkt_replymsg(&msg->lmsg, error); 298 } 299 300 static void 301 uipc_bind(netmsg_t msg) 302 { 303 struct unpcb *unp; 304 int error; 305 306 lwkt_gettoken(&unp_token); 307 unp = unp_getsocktoken(msg->base.nm_so); 308 309 if (UNP_ISATTACHED(unp)) 310 error = unp_bind(unp, msg->bind.nm_nam, msg->bind.nm_td); 311 else 312 error = EINVAL; 313 314 unp_reltoken(unp); 315 lwkt_reltoken(&unp_token); 316 317 lwkt_replymsg(&msg->lmsg, error); 318 } 319 320 static void 321 uipc_connect(netmsg_t msg) 322 { 323 int error; 324 325 error = unp_connect(msg->base.nm_so, msg->connect.nm_nam, 326 msg->connect.nm_td); 327 lwkt_replymsg(&msg->lmsg, error); 328 } 329 330 static void 331 uipc_connect2(netmsg_t msg) 332 { 333 int error; 334 335 error = unp_connect2(msg->connect2.nm_so1, msg->connect2.nm_so2); 336 lwkt_replymsg(&msg->lmsg, error); 337 } 338 339 /* control is EOPNOTSUPP */ 340 341 static void 342 uipc_detach(netmsg_t msg) 343 { 344 struct unpcb *unp; 345 int error; 346 347 lwkt_gettoken(&unp_token); 348 unp = unp_getsocktoken(msg->base.nm_so); 349 350 if (UNP_ISATTACHED(unp)) { 351 unp_setflags(unp, UNP_DETACHED); 352 unp_drop(unp, 0); 353 unp_free(unp); 354 error = 0; 355 } else { 356 error = EINVAL; 357 } 358 359 unp_reltoken(unp); 360 lwkt_reltoken(&unp_token); 361 362 lwkt_replymsg(&msg->lmsg, error); 363 } 364 365 static void 366 uipc_disconnect(netmsg_t msg) 367 { 368 struct unpcb *unp; 369 int error; 370 371 lwkt_gettoken(&unp_token); 372 unp = unp_getsocktoken(msg->base.nm_so); 373 374 if (UNP_ISATTACHED(unp)) { 375 unp_disconnect(unp, 0); 376 error = 0; 377 } else { 378 error = EINVAL; 379 } 380 381 unp_reltoken(unp); 382 lwkt_reltoken(&unp_token); 383 384 lwkt_replymsg(&msg->lmsg, error); 385 } 386 387 static void 388 uipc_listen(netmsg_t msg) 389 { 390 struct unpcb *unp; 391 int error; 392 393 lwkt_gettoken(&unp_token); 394 unp = unp_getsocktoken(msg->base.nm_so); 395 396 if (!UNP_ISATTACHED(unp) || unp->unp_vnode == NULL) 397 error = EINVAL; 398 else 399 error = unp_listen(unp, msg->listen.nm_td); 400 401 unp_reltoken(unp); 402 lwkt_reltoken(&unp_token); 403 404 lwkt_replymsg(&msg->lmsg, error); 405 } 406 407 static void 408 uipc_peeraddr(netmsg_t msg) 409 { 410 struct unpcb *unp; 411 int error; 412 413 lwkt_gettoken(&unp_token); 414 unp = unp_getsocktoken(msg->base.nm_so); 415 416 if (!UNP_ISATTACHED(unp)) { 417 error = EINVAL; 418 } else if (unp->unp_conn && unp->unp_conn->unp_addr) { 419 struct unpcb *unp2 = unp->unp_conn; 420 421 unp_reference(unp2); 422 *msg->peeraddr.nm_nam = dup_sockaddr( 423 (struct sockaddr *)unp2->unp_addr); 424 unp_free(unp2); 425 error = 0; 426 } else { 427 /* 428 * XXX: It seems that this test always fails even when 429 * connection is established. So, this else clause is 430 * added as workaround to return PF_LOCAL sockaddr. 431 */ 432 *msg->peeraddr.nm_nam = dup_sockaddr(&sun_noname); 433 error = 0; 434 } 435 436 unp_reltoken(unp); 437 lwkt_reltoken(&unp_token); 438 439 lwkt_replymsg(&msg->lmsg, error); 440 } 441 442 static void 443 uipc_rcvd(netmsg_t msg) 444 { 445 struct unpcb *unp, *unp2; 446 struct socket *so; 447 struct socket *so2; 448 int error; 449 450 /* 451 * so_pcb is only modified with both the global and the unp 452 * pool token held. 453 */ 454 so = msg->base.nm_so; 455 unp = unp_getsocktoken(so); 456 457 if (!UNP_ISATTACHED(unp)) { 458 error = EINVAL; 459 goto done; 460 } 461 462 switch (so->so_type) { 463 case SOCK_DGRAM: 464 panic("uipc_rcvd DGRAM?"); 465 /*NOTREACHED*/ 466 case SOCK_STREAM: 467 case SOCK_SEQPACKET: 468 if (unp->unp_conn == NULL) 469 break; 470 unp2 = unp->unp_conn; /* protected by pool token */ 471 472 /* 473 * Because we are transfering mbufs directly to the 474 * peer socket we have to use SSB_STOP on the sender 475 * to prevent it from building up infinite mbufs. 476 * 477 * As in several places in this module w ehave to ref unp2 478 * to ensure that it does not get ripped out from under us 479 * if we block on the so2 token or in sowwakeup(). 480 */ 481 so2 = unp2->unp_socket; 482 unp_reference(unp2); 483 lwkt_gettoken(&so2->so_rcv.ssb_token); 484 if (so->so_rcv.ssb_cc < so2->so_snd.ssb_hiwat && 485 so->so_rcv.ssb_mbcnt < so2->so_snd.ssb_mbmax 486 ) { 487 atomic_clear_int(&so2->so_snd.ssb_flags, SSB_STOP); 488 489 sowwakeup(so2); 490 } 491 lwkt_reltoken(&so2->so_rcv.ssb_token); 492 unp_free(unp2); 493 break; 494 default: 495 panic("uipc_rcvd unknown socktype"); 496 /*NOTREACHED*/ 497 } 498 error = 0; 499 done: 500 unp_reltoken(unp); 501 lwkt_replymsg(&msg->lmsg, error); 502 } 503 504 /* pru_rcvoob is EOPNOTSUPP */ 505 506 static void 507 uipc_send(netmsg_t msg) 508 { 509 struct unpcb *unp, *unp2; 510 struct socket *so; 511 struct socket *so2; 512 struct mbuf *control; 513 struct mbuf *m; 514 int error = 0; 515 516 so = msg->base.nm_so; 517 control = msg->send.nm_control; 518 m = msg->send.nm_m; 519 520 /* 521 * so_pcb is only modified with both the global and the unp 522 * pool token held. 523 */ 524 so = msg->base.nm_so; 525 unp = unp_getsocktoken(so); 526 527 if (!UNP_ISATTACHED(unp)) { 528 error = EINVAL; 529 goto release; 530 } 531 532 if (msg->send.nm_flags & PRUS_OOB) { 533 error = EOPNOTSUPP; 534 goto release; 535 } 536 537 wakeup_start_delayed(); 538 539 if (control && (error = unp_internalize(control, msg->send.nm_td))) 540 goto release; 541 542 switch (so->so_type) { 543 case SOCK_DGRAM: 544 { 545 struct sockaddr *from; 546 547 if (msg->send.nm_addr) { 548 if (unp->unp_conn) { 549 error = EISCONN; 550 break; 551 } 552 error = unp_find_lockref(msg->send.nm_addr, 553 msg->send.nm_td, so->so_type, &unp2); 554 if (error) 555 break; 556 /* 557 * NOTE: 558 * unp2 is locked and referenced. 559 * 560 * We could unlock unp2 now, since it was checked 561 * and referenced. 562 */ 563 unp_reltoken(unp2); 564 } else { 565 if (unp->unp_conn == NULL) { 566 error = ENOTCONN; 567 break; 568 } 569 unp2 = unp->unp_conn; 570 unp_reference(unp2); 571 } 572 /* NOTE: unp2 is referenced. */ 573 so2 = unp2->unp_socket; 574 575 if (unp->unp_addr) 576 from = (struct sockaddr *)unp->unp_addr; 577 else 578 from = &sun_noname; 579 580 lwkt_gettoken(&so2->so_rcv.ssb_token); 581 if (ssb_appendaddr(&so2->so_rcv, from, m, control)) { 582 sorwakeup(so2); 583 m = NULL; 584 control = NULL; 585 } else { 586 error = ENOBUFS; 587 } 588 lwkt_reltoken(&so2->so_rcv.ssb_token); 589 590 unp_free(unp2); 591 break; 592 } 593 594 case SOCK_STREAM: 595 case SOCK_SEQPACKET: 596 /* Connect if not connected yet. */ 597 /* 598 * Note: A better implementation would complain 599 * if not equal to the peer's address. 600 */ 601 if (unp->unp_conn == NULL) { 602 if (msg->send.nm_addr) { 603 error = unp_connect(so, 604 msg->send.nm_addr, 605 msg->send.nm_td); 606 if (error) 607 break; /* XXX */ 608 } 609 /* 610 * NOTE: 611 * unp_conn still could be NULL, even if the 612 * above unp_connect() succeeds; since the 613 * current unp's token could be released due 614 * to blocking operations after unp_conn is 615 * assigned. 616 */ 617 if (unp->unp_conn == NULL) { 618 error = ENOTCONN; 619 break; 620 } 621 } 622 if (so->so_state & SS_CANTSENDMORE) { 623 error = EPIPE; 624 break; 625 } 626 627 unp2 = unp->unp_conn; 628 KASSERT(unp2 != NULL, ("unp is not connected")); 629 so2 = unp2->unp_socket; 630 631 unp_reference(unp2); 632 633 /* 634 * Send to paired receive port, and then reduce 635 * send buffer hiwater marks to maintain backpressure. 636 * Wake up readers. 637 */ 638 lwkt_gettoken(&so2->so_rcv.ssb_token); 639 if (control) { 640 if (ssb_appendcontrol(&so2->so_rcv, m, control)) { 641 control = NULL; 642 m = NULL; 643 } 644 } else if (so->so_type == SOCK_SEQPACKET) { 645 sbappendrecord(&so2->so_rcv.sb, m); 646 m = NULL; 647 } else { 648 sbappend(&so2->so_rcv.sb, m); 649 m = NULL; 650 } 651 652 /* 653 * Because we are transfering mbufs directly to the 654 * peer socket we have to use SSB_STOP on the sender 655 * to prevent it from building up infinite mbufs. 656 */ 657 if (so2->so_rcv.ssb_cc >= so->so_snd.ssb_hiwat || 658 so2->so_rcv.ssb_mbcnt >= so->so_snd.ssb_mbmax 659 ) { 660 atomic_set_int(&so->so_snd.ssb_flags, SSB_STOP); 661 } 662 lwkt_reltoken(&so2->so_rcv.ssb_token); 663 sorwakeup(so2); 664 665 unp_free(unp2); 666 break; 667 668 default: 669 panic("uipc_send unknown socktype"); 670 } 671 672 /* 673 * SEND_EOF is equivalent to a SEND followed by a SHUTDOWN. 674 */ 675 if (msg->send.nm_flags & PRUS_EOF) { 676 socantsendmore(so); 677 unp_shutdown(unp); 678 } 679 680 if (control && error != 0) 681 unp_dispose(control); 682 release: 683 unp_reltoken(unp); 684 wakeup_end_delayed(); 685 686 if (control) 687 m_freem(control); 688 if (m) 689 m_freem(m); 690 lwkt_replymsg(&msg->lmsg, error); 691 } 692 693 /* 694 * MPSAFE 695 */ 696 static void 697 uipc_sense(netmsg_t msg) 698 { 699 struct unpcb *unp; 700 struct socket *so; 701 struct stat *sb; 702 int error; 703 704 so = msg->base.nm_so; 705 sb = msg->sense.nm_stat; 706 707 /* 708 * so_pcb is only modified with both the global and the unp 709 * pool token held. 710 */ 711 unp = unp_getsocktoken(so); 712 713 if (!UNP_ISATTACHED(unp)) { 714 error = EINVAL; 715 goto done; 716 } 717 718 sb->st_blksize = so->so_snd.ssb_hiwat; 719 sb->st_dev = NOUDEV; 720 if (unp->unp_ino == 0) { /* make up a non-zero inode number */ 721 unp->unp_ino = atomic_fetchadd_long(&unp_ino, 1); 722 if (__predict_false(unp->unp_ino == 0)) 723 unp->unp_ino = atomic_fetchadd_long(&unp_ino, 1); 724 } 725 sb->st_ino = unp->unp_ino; 726 error = 0; 727 done: 728 unp_reltoken(unp); 729 lwkt_replymsg(&msg->lmsg, error); 730 } 731 732 static void 733 uipc_shutdown(netmsg_t msg) 734 { 735 struct socket *so; 736 struct unpcb *unp; 737 int error; 738 739 /* 740 * so_pcb is only modified with both the global and the unp 741 * pool token held. 742 */ 743 so = msg->base.nm_so; 744 unp = unp_getsocktoken(so); 745 746 if (UNP_ISATTACHED(unp)) { 747 socantsendmore(so); 748 unp_shutdown(unp); 749 error = 0; 750 } else { 751 error = EINVAL; 752 } 753 754 unp_reltoken(unp); 755 lwkt_replymsg(&msg->lmsg, error); 756 } 757 758 static void 759 uipc_sockaddr(netmsg_t msg) 760 { 761 struct unpcb *unp; 762 int error; 763 764 /* 765 * so_pcb is only modified with both the global and the unp 766 * pool token held. 767 */ 768 unp = unp_getsocktoken(msg->base.nm_so); 769 770 if (UNP_ISATTACHED(unp)) { 771 if (unp->unp_addr) { 772 *msg->sockaddr.nm_nam = 773 dup_sockaddr((struct sockaddr *)unp->unp_addr); 774 } 775 error = 0; 776 } else { 777 error = EINVAL; 778 } 779 780 unp_reltoken(unp); 781 lwkt_replymsg(&msg->lmsg, error); 782 } 783 784 struct pr_usrreqs uipc_usrreqs = { 785 .pru_abort = uipc_abort, 786 .pru_accept = uipc_accept, 787 .pru_attach = uipc_attach, 788 .pru_bind = uipc_bind, 789 .pru_connect = uipc_connect, 790 .pru_connect2 = uipc_connect2, 791 .pru_control = pr_generic_notsupp, 792 .pru_detach = uipc_detach, 793 .pru_disconnect = uipc_disconnect, 794 .pru_listen = uipc_listen, 795 .pru_peeraddr = uipc_peeraddr, 796 .pru_rcvd = uipc_rcvd, 797 .pru_rcvoob = pr_generic_notsupp, 798 .pru_send = uipc_send, 799 .pru_sense = uipc_sense, 800 .pru_shutdown = uipc_shutdown, 801 .pru_sockaddr = uipc_sockaddr, 802 .pru_sosend = sosend, 803 .pru_soreceive = soreceive 804 }; 805 806 void 807 uipc_ctloutput(netmsg_t msg) 808 { 809 struct socket *so; 810 struct sockopt *sopt; 811 struct unpcb *unp; 812 int error = 0; 813 814 so = msg->base.nm_so; 815 sopt = msg->ctloutput.nm_sopt; 816 817 lwkt_gettoken(&unp_token); 818 unp = unp_getsocktoken(so); 819 820 if (!UNP_ISATTACHED(unp)) { 821 error = EINVAL; 822 goto done; 823 } 824 825 switch (sopt->sopt_dir) { 826 case SOPT_GET: 827 switch (sopt->sopt_name) { 828 case LOCAL_PEERCRED: 829 if (unp->unp_flags & UNP_HAVEPC) 830 soopt_from_kbuf(sopt, &unp->unp_peercred, 831 sizeof(unp->unp_peercred)); 832 else { 833 if (so->so_type == SOCK_STREAM) 834 error = ENOTCONN; 835 else if (so->so_type == SOCK_SEQPACKET) 836 error = ENOTCONN; 837 else 838 error = EINVAL; 839 } 840 break; 841 default: 842 error = EOPNOTSUPP; 843 break; 844 } 845 break; 846 case SOPT_SET: 847 default: 848 error = EOPNOTSUPP; 849 break; 850 } 851 852 done: 853 unp_reltoken(unp); 854 lwkt_reltoken(&unp_token); 855 856 lwkt_replymsg(&msg->lmsg, error); 857 } 858 859 /* 860 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 861 * for stream sockets, although the total for sender and receiver is 862 * actually only PIPSIZ. 863 * 864 * Datagram sockets really use the sendspace as the maximum datagram size, 865 * and don't really want to reserve the sendspace. Their recvspace should 866 * be large enough for at least one max-size datagram plus address. 867 * 868 * We want the local send/recv space to be significant larger then lo0's 869 * mtu of 16384. 870 */ 871 #ifndef PIPSIZ 872 #define PIPSIZ 57344 873 #endif 874 static u_long unpst_sendspace = PIPSIZ; 875 static u_long unpst_recvspace = PIPSIZ; 876 static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 877 static u_long unpdg_recvspace = 4*1024; 878 879 static int unp_rights; /* file descriptors in flight */ 880 static struct spinlock unp_spin = SPINLOCK_INITIALIZER(&unp_spin, "unp_spin"); 881 882 SYSCTL_DECL(_net_local_seqpacket); 883 SYSCTL_DECL(_net_local_stream); 884 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, 885 &unpst_sendspace, 0, "Size of stream socket send buffer"); 886 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, 887 &unpst_recvspace, 0, "Size of stream socket receive buffer"); 888 889 SYSCTL_DECL(_net_local_dgram); 890 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, 891 &unpdg_sendspace, 0, "Max datagram socket size"); 892 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, 893 &unpdg_recvspace, 0, "Size of datagram socket receive buffer"); 894 895 SYSCTL_DECL(_net_local); 896 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, 897 "File descriptors in flight"); 898 899 static int 900 unp_attach(struct socket *so, struct pru_attach_info *ai) 901 { 902 struct unp_global_head *head; 903 struct unpcb *unp; 904 int error; 905 906 lwkt_gettoken(&unp_token); 907 908 if (so->so_snd.ssb_hiwat == 0 || so->so_rcv.ssb_hiwat == 0) { 909 switch (so->so_type) { 910 case SOCK_STREAM: 911 case SOCK_SEQPACKET: 912 error = soreserve(so, unpst_sendspace, unpst_recvspace, 913 ai->sb_rlimit); 914 break; 915 916 case SOCK_DGRAM: 917 error = soreserve(so, unpdg_sendspace, unpdg_recvspace, 918 ai->sb_rlimit); 919 break; 920 921 default: 922 panic("unp_attach"); 923 } 924 if (error) 925 goto failed; 926 } 927 928 /* 929 * In order to support sendfile we have to set either SSB_STOPSUPP 930 * or SSB_PREALLOC. Unix domain sockets use the SSB_STOP flow 931 * control mechanism. 932 */ 933 if (so->so_type == SOCK_STREAM) { 934 atomic_set_int(&so->so_rcv.ssb_flags, SSB_STOPSUPP); 935 atomic_set_int(&so->so_snd.ssb_flags, SSB_STOPSUPP); 936 } 937 938 unp = kmalloc(sizeof(*unp), M_UNPCB, M_WAITOK | M_ZERO | M_NULLOK); 939 if (unp == NULL) { 940 error = ENOBUFS; 941 goto failed; 942 } 943 unp->unp_refcnt = 1; 944 unp->unp_gencnt = ++unp_gencnt; 945 LIST_INIT(&unp->unp_refs); 946 unp->unp_socket = so; 947 unp->unp_rvnode = ai->fd_rdir; /* jail cruft XXX JH */ 948 so->so_pcb = (caddr_t)unp; 949 soreference(so); 950 951 head = unp_globalhead(so->so_type); 952 TAILQ_INSERT_TAIL(&head->list, unp, unp_link); 953 head->count++; 954 error = 0; 955 failed: 956 lwkt_reltoken(&unp_token); 957 return error; 958 } 959 960 static void 961 unp_detach(struct unpcb *unp) 962 { 963 struct unp_global_head *head; 964 struct socket *so; 965 966 lwkt_gettoken(&unp_token); 967 lwkt_getpooltoken(unp); 968 969 so = unp->unp_socket; 970 971 head = unp_globalhead(so->so_type); 972 KASSERT(head->count > 0, ("invalid unp count")); 973 TAILQ_REMOVE(&head->list, unp, unp_link); 974 head->count--; 975 976 unp->unp_gencnt = ++unp_gencnt; 977 if (unp->unp_vnode) { 978 unp->unp_vnode->v_socket = NULL; 979 vrele(unp->unp_vnode); 980 unp->unp_vnode = NULL; 981 } 982 soisdisconnected(so); 983 soreference(so); /* for delayed sorflush */ 984 KKASSERT(so->so_pcb == unp); 985 so->so_pcb = NULL; /* both tokens required */ 986 unp->unp_socket = NULL; 987 sofree(so); /* remove pcb ref */ 988 989 if (unp_rights) { 990 /* 991 * Normally the receive buffer is flushed later, 992 * in sofree, but if our receive buffer holds references 993 * to descriptors that are now garbage, we will dispose 994 * of those descriptor references after the garbage collector 995 * gets them (resulting in a "panic: closef: count < 0"). 996 */ 997 sorflush(so); 998 unp_gc(); 999 } 1000 sofree(so); 1001 lwkt_relpooltoken(unp); 1002 lwkt_reltoken(&unp_token); 1003 1004 KASSERT(unp->unp_conn == NULL, ("unp is still connected")); 1005 KASSERT(LIST_EMPTY(&unp->unp_refs), ("unp still has references")); 1006 1007 if (unp->unp_addr) 1008 kfree(unp->unp_addr, M_SONAME); 1009 kfree(unp, M_UNPCB); 1010 } 1011 1012 static int 1013 unp_bind(struct unpcb *unp, struct sockaddr *nam, struct thread *td) 1014 { 1015 struct proc *p = td->td_proc; 1016 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 1017 struct vnode *vp; 1018 struct vattr vattr; 1019 int error, namelen; 1020 struct nlookupdata nd; 1021 char buf[SOCK_MAXADDRLEN]; 1022 1023 ASSERT_LWKT_TOKEN_HELD(&unp_token); 1024 UNP_ASSERT_TOKEN_HELD(unp); 1025 1026 if (unp->unp_vnode != NULL) 1027 return EINVAL; 1028 1029 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); 1030 if (namelen <= 0) 1031 return EINVAL; 1032 strncpy(buf, soun->sun_path, namelen); 1033 buf[namelen] = 0; /* null-terminate the string */ 1034 error = nlookup_init(&nd, buf, UIO_SYSSPACE, 1035 NLC_LOCKVP | NLC_CREATE | NLC_REFDVP); 1036 if (error == 0) 1037 error = nlookup(&nd); 1038 if (error == 0 && nd.nl_nch.ncp->nc_vp != NULL) 1039 error = EADDRINUSE; 1040 if (error) 1041 goto done; 1042 1043 VATTR_NULL(&vattr); 1044 vattr.va_type = VSOCK; 1045 vattr.va_mode = (ACCESSPERMS & ~p->p_fd->fd_cmask); 1046 error = VOP_NCREATE(&nd.nl_nch, nd.nl_dvp, &vp, nd.nl_cred, &vattr); 1047 if (error == 0) { 1048 if (unp->unp_vnode == NULL) { 1049 vp->v_socket = unp->unp_socket; 1050 unp->unp_vnode = vp; 1051 unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam); 1052 vn_unlock(vp); 1053 } else { 1054 vput(vp); /* late race */ 1055 error = EINVAL; 1056 } 1057 } 1058 done: 1059 nlookup_done(&nd); 1060 return (error); 1061 } 1062 1063 static int 1064 unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 1065 { 1066 struct unpcb *unp, *unp2; 1067 int error, flags = 0; 1068 1069 lwkt_gettoken(&unp_token); 1070 1071 unp = unp_getsocktoken(so); 1072 if (!UNP_ISATTACHED(unp)) { 1073 error = EINVAL; 1074 goto failed; 1075 } 1076 1077 if ((unp->unp_flags & UNP_CONNECTING) || unp->unp_conn != NULL) { 1078 error = EISCONN; 1079 goto failed; 1080 } 1081 1082 flags = UNP_CONNECTING; 1083 unp_setflags(unp, flags); 1084 1085 error = unp_find_lockref(nam, td, so->so_type, &unp2); 1086 if (error) 1087 goto failed; 1088 /* 1089 * NOTE: 1090 * unp2 is locked and referenced. 1091 */ 1092 1093 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 1094 struct socket *so2, *so3; 1095 struct unpcb *unp3; 1096 1097 so2 = unp2->unp_socket; 1098 if (!(so2->so_options & SO_ACCEPTCONN) || 1099 (so3 = sonewconn_faddr(so2, 0, NULL, 1100 TRUE /* keep ref */)) == NULL) { 1101 error = ECONNREFUSED; 1102 goto done; 1103 } 1104 /* so3 has a socket reference. */ 1105 1106 unp3 = unp_getsocktoken(so3); 1107 if (!UNP_ISATTACHED(unp3)) { 1108 unp_reltoken(unp3); 1109 /* 1110 * Already aborted; we only need to drop the 1111 * socket reference held by sonewconn_faddr(). 1112 */ 1113 sofree(so3); 1114 error = ECONNREFUSED; 1115 goto done; 1116 } 1117 unp_reference(unp3); 1118 /* 1119 * NOTE: 1120 * unp3 is locked and referenced. 1121 */ 1122 1123 /* 1124 * Release so3 socket reference held by sonewconn_faddr(). 1125 * Since we have referenced unp3, neither unp3 nor so3 will 1126 * be destroyed here. 1127 */ 1128 sofree(so3); 1129 1130 if (unp2->unp_addr != NULL) { 1131 unp3->unp_addr = (struct sockaddr_un *) 1132 dup_sockaddr((struct sockaddr *)unp2->unp_addr); 1133 } 1134 1135 /* 1136 * unp_peercred management: 1137 * 1138 * The connecter's (client's) credentials are copied 1139 * from its process structure at the time of connect() 1140 * (which is now). 1141 */ 1142 cru2x(td->td_proc->p_ucred, &unp3->unp_peercred); 1143 unp_setflags(unp3, UNP_HAVEPC); 1144 /* 1145 * The receiver's (server's) credentials are copied 1146 * from the unp_peercred member of socket on which the 1147 * former called listen(); unp_listen() cached that 1148 * process's credentials at that time so we can use 1149 * them now. 1150 */ 1151 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED, 1152 ("unp_connect: listener without cached peercred")); 1153 memcpy(&unp->unp_peercred, &unp2->unp_peercred, 1154 sizeof(unp->unp_peercred)); 1155 unp_setflags(unp, UNP_HAVEPC); 1156 1157 error = unp_connect_pair(unp, unp3); 1158 if (error) { 1159 /* XXX we need a better name */ 1160 soabort_oncpu(so3); 1161 } 1162 1163 /* Done with unp3 */ 1164 unp_free(unp3); 1165 unp_reltoken(unp3); 1166 } else { 1167 error = unp_connect_pair(unp, unp2); 1168 } 1169 done: 1170 unp_free(unp2); 1171 unp_reltoken(unp2); 1172 failed: 1173 if (flags) 1174 unp_clrflags(unp, flags); 1175 unp_reltoken(unp); 1176 1177 lwkt_reltoken(&unp_token); 1178 return (error); 1179 } 1180 1181 /* 1182 * Connect two unix domain sockets together. 1183 * 1184 * NOTE: Semantics for any change to unp_conn requires that the per-unp 1185 * pool token also be held. 1186 */ 1187 int 1188 unp_connect2(struct socket *so, struct socket *so2) 1189 { 1190 struct unpcb *unp, *unp2; 1191 int error; 1192 1193 lwkt_gettoken(&unp_token); 1194 if (so2->so_type != so->so_type) { 1195 lwkt_reltoken(&unp_token); 1196 return (EPROTOTYPE); 1197 } 1198 unp = unp_getsocktoken(so); 1199 unp2 = unp_getsocktoken(so2); 1200 1201 if (!UNP_ISATTACHED(unp)) { 1202 error = EINVAL; 1203 goto done; 1204 } 1205 if (!UNP_ISATTACHED(unp2)) { 1206 error = ECONNREFUSED; 1207 goto done; 1208 } 1209 1210 if (unp->unp_conn != NULL) { 1211 error = EISCONN; 1212 goto done; 1213 } 1214 if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) && 1215 unp2->unp_conn != NULL) { 1216 error = EISCONN; 1217 goto done; 1218 } 1219 1220 error = unp_connect_pair(unp, unp2); 1221 done: 1222 unp_reltoken(unp2); 1223 unp_reltoken(unp); 1224 lwkt_reltoken(&unp_token); 1225 return (error); 1226 } 1227 1228 /* 1229 * Disconnect a unix domain socket pair. 1230 * 1231 * NOTE: Semantics for any change to unp_conn requires that the per-unp 1232 * pool token also be held. 1233 */ 1234 static void 1235 unp_disconnect(struct unpcb *unp, int error) 1236 { 1237 struct socket *so = unp->unp_socket; 1238 struct unpcb *unp2; 1239 1240 ASSERT_LWKT_TOKEN_HELD(&unp_token); 1241 UNP_ASSERT_TOKEN_HELD(unp); 1242 1243 if (error) 1244 so->so_error = error; 1245 1246 while ((unp2 = unp->unp_conn) != NULL) { 1247 lwkt_getpooltoken(unp2); 1248 if (unp2 == unp->unp_conn) 1249 break; 1250 lwkt_relpooltoken(unp2); 1251 } 1252 if (unp2 == NULL) 1253 return; 1254 /* unp2 is locked. */ 1255 1256 KASSERT((unp2->unp_flags & UNP_DROPPED) == 0, ("unp2 was dropped")); 1257 1258 unp->unp_conn = NULL; 1259 1260 switch (so->so_type) { 1261 case SOCK_DGRAM: 1262 LIST_REMOVE(unp, unp_reflink); 1263 soclrstate(so, SS_ISCONNECTED); 1264 break; 1265 1266 case SOCK_STREAM: 1267 case SOCK_SEQPACKET: 1268 /* 1269 * Keep a reference before clearing the unp_conn 1270 * to avoid racing uipc_detach()/uipc_abort() in 1271 * other thread. 1272 */ 1273 unp_reference(unp2); 1274 KASSERT(unp2->unp_conn == unp, ("unp_conn mismatch")); 1275 unp2->unp_conn = NULL; 1276 1277 soisdisconnected(so); 1278 soisdisconnected(unp2->unp_socket); 1279 1280 unp_free(unp2); 1281 break; 1282 } 1283 1284 lwkt_relpooltoken(unp2); 1285 } 1286 1287 #ifdef notdef 1288 void 1289 unp_abort(struct unpcb *unp) 1290 { 1291 lwkt_gettoken(&unp_token); 1292 unp_free(unp); 1293 lwkt_reltoken(&unp_token); 1294 } 1295 #endif 1296 1297 static int 1298 prison_unpcb(struct thread *td, struct unpcb *unp) 1299 { 1300 struct proc *p; 1301 1302 if (td == NULL) 1303 return (0); 1304 if ((p = td->td_proc) == NULL) 1305 return (0); 1306 if (!p->p_ucred->cr_prison) 1307 return (0); 1308 if (p->p_fd->fd_rdir == unp->unp_rvnode) 1309 return (0); 1310 return (1); 1311 } 1312 1313 static int 1314 unp_pcblist(SYSCTL_HANDLER_ARGS) 1315 { 1316 struct unp_global_head *head = arg1; 1317 int error, i, n; 1318 struct unpcb *unp, *marker; 1319 1320 KKASSERT(curproc != NULL); 1321 1322 /* 1323 * The process of preparing the PCB list is too time-consuming and 1324 * resource-intensive to repeat twice on every request. 1325 */ 1326 if (req->oldptr == NULL) { 1327 n = head->count; 1328 req->oldidx = (n + n/8) * sizeof(struct xunpcb); 1329 return 0; 1330 } 1331 1332 if (req->newptr != NULL) 1333 return EPERM; 1334 1335 marker = kmalloc(sizeof(*marker), M_UNPCB, M_WAITOK | M_ZERO); 1336 marker->unp_flags |= UNP_MARKER; 1337 1338 lwkt_gettoken(&unp_token); 1339 1340 n = head->count; 1341 i = 0; 1342 error = 0; 1343 1344 TAILQ_INSERT_HEAD(&head->list, marker, unp_link); 1345 while ((unp = TAILQ_NEXT(marker, unp_link)) != NULL && i < n) { 1346 struct xunpcb xu; 1347 1348 TAILQ_REMOVE(&head->list, marker, unp_link); 1349 TAILQ_INSERT_AFTER(&head->list, unp, marker, unp_link); 1350 1351 if (unp->unp_flags & UNP_MARKER) 1352 continue; 1353 if (prison_unpcb(req->td, unp)) 1354 continue; 1355 1356 xu.xu_len = sizeof(xu); 1357 xu.xu_unpp = unp; 1358 1359 /* 1360 * NOTE: 1361 * unp->unp_addr and unp->unp_conn are protected by 1362 * unp_token. So if we want to get rid of unp_token 1363 * or reduce the coverage of unp_token, care must be 1364 * taken. 1365 */ 1366 if (unp->unp_addr) { 1367 bcopy(unp->unp_addr, &xu.xu_addr, 1368 unp->unp_addr->sun_len); 1369 } 1370 if (unp->unp_conn && unp->unp_conn->unp_addr) { 1371 bcopy(unp->unp_conn->unp_addr, 1372 &xu.xu_caddr, 1373 unp->unp_conn->unp_addr->sun_len); 1374 } 1375 bcopy(unp, &xu.xu_unp, sizeof(*unp)); 1376 sotoxsocket(unp->unp_socket, &xu.xu_socket); 1377 1378 /* NOTE: This could block and temporarily release unp_token */ 1379 error = SYSCTL_OUT(req, &xu, sizeof(xu)); 1380 if (error) 1381 break; 1382 ++i; 1383 } 1384 TAILQ_REMOVE(&head->list, marker, unp_link); 1385 1386 lwkt_reltoken(&unp_token); 1387 1388 kfree(marker, M_UNPCB); 1389 return error; 1390 } 1391 1392 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, 1393 &unp_dgram_head, 0, unp_pcblist, "S,xunpcb", 1394 "List of active local datagram sockets"); 1395 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, 1396 &unp_stream_head, 0, unp_pcblist, "S,xunpcb", 1397 "List of active local stream sockets"); 1398 SYSCTL_PROC(_net_local_seqpacket, OID_AUTO, pcblist, CTLFLAG_RD, 1399 &unp_seqpkt_head, 0, unp_pcblist, "S,xunpcb", 1400 "List of active local seqpacket sockets"); 1401 1402 static void 1403 unp_shutdown(struct unpcb *unp) 1404 { 1405 struct socket *so; 1406 1407 if ((unp->unp_socket->so_type == SOCK_STREAM || 1408 unp->unp_socket->so_type == SOCK_SEQPACKET) && 1409 unp->unp_conn != NULL && (so = unp->unp_conn->unp_socket)) { 1410 socantrcvmore(so); 1411 } 1412 } 1413 1414 #ifdef notdef 1415 void 1416 unp_drain(void) 1417 { 1418 lwkt_gettoken(&unp_token); 1419 lwkt_reltoken(&unp_token); 1420 } 1421 #endif 1422 1423 int 1424 unp_externalize(struct mbuf *rights) 1425 { 1426 struct thread *td = curthread; 1427 struct proc *p = td->td_proc; /* XXX */ 1428 struct lwp *lp = td->td_lwp; 1429 struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 1430 int *fdp; 1431 int i; 1432 struct file **rp; 1433 struct file *fp; 1434 int newfds = (cm->cmsg_len - (CMSG_DATA(cm) - (u_char *)cm)) 1435 / sizeof(struct file *); 1436 int f; 1437 1438 lwkt_gettoken(&unp_token); 1439 1440 /* 1441 * if the new FD's will not fit, then we free them all 1442 */ 1443 if (!fdavail(p, newfds)) { 1444 rp = (struct file **)CMSG_DATA(cm); 1445 for (i = 0; i < newfds; i++) { 1446 fp = *rp; 1447 /* 1448 * zero the pointer before calling unp_discard, 1449 * since it may end up in unp_gc().. 1450 */ 1451 *rp++ = NULL; 1452 unp_discard(fp, NULL); 1453 } 1454 lwkt_reltoken(&unp_token); 1455 return (EMSGSIZE); 1456 } 1457 1458 /* 1459 * now change each pointer to an fd in the global table to 1460 * an integer that is the index to the local fd table entry 1461 * that we set up to point to the global one we are transferring. 1462 * If sizeof (struct file *) is bigger than or equal to sizeof int, 1463 * then do it in forward order. In that case, an integer will 1464 * always come in the same place or before its corresponding 1465 * struct file pointer. 1466 * If sizeof (struct file *) is smaller than sizeof int, then 1467 * do it in reverse order. 1468 */ 1469 if (sizeof(struct file *) >= sizeof(int)) { 1470 fdp = (int *)CMSG_DATA(cm); 1471 rp = (struct file **)CMSG_DATA(cm); 1472 for (i = 0; i < newfds; i++) { 1473 if (fdalloc(p, 0, &f)) { 1474 int j; 1475 1476 /* 1477 * Previous fdavail() can't garantee 1478 * fdalloc() success due to SMP race. 1479 * Just clean up and return the same 1480 * error value as if fdavail() failed. 1481 */ 1482 1483 /* Close externalized files */ 1484 for (j = 0; j < i; j++) 1485 kern_close(fdp[j]); 1486 /* Discard the rest of internal files */ 1487 for (; i < newfds; i++) 1488 unp_discard(rp[i], NULL); 1489 /* Wipe out the control message */ 1490 for (i = 0; i < newfds; i++) 1491 rp[i] = NULL; 1492 1493 lwkt_reltoken(&unp_token); 1494 return (EMSGSIZE); 1495 } 1496 fp = rp[i]; 1497 unp_fp_externalize(lp, fp, f); 1498 fdp[i] = f; 1499 } 1500 } else { 1501 /* 1502 * XXX 1503 * Will this ever happen? I don't think compiler will 1504 * generate code for this code segment -- sephe 1505 */ 1506 fdp = (int *)CMSG_DATA(cm) + newfds - 1; 1507 rp = (struct file **)CMSG_DATA(cm) + newfds - 1; 1508 for (i = 0; i < newfds; i++) { 1509 if (fdalloc(p, 0, &f)) 1510 panic("unp_externalize"); 1511 fp = *rp--; 1512 unp_fp_externalize(lp, fp, f); 1513 *fdp-- = f; 1514 } 1515 } 1516 1517 /* 1518 * Adjust length, in case sizeof(struct file *) and sizeof(int) 1519 * differs. 1520 */ 1521 cm->cmsg_len = CMSG_LEN(newfds * sizeof(int)); 1522 rights->m_len = cm->cmsg_len; 1523 1524 lwkt_reltoken(&unp_token); 1525 return (0); 1526 } 1527 1528 static void 1529 unp_fp_externalize(struct lwp *lp, struct file *fp, int fd) 1530 { 1531 lwkt_gettoken(&unp_token); 1532 1533 if (lp) { 1534 KKASSERT(fd >= 0); 1535 if (fp->f_flag & FREVOKED) { 1536 struct file *fx; 1537 int error; 1538 1539 kprintf("Warning: revoked fp exiting unix socket\n"); 1540 error = falloc(lp, &fx, NULL); 1541 if (error == 0) { 1542 fsetfd(lp->lwp_proc->p_fd, fx, fd); 1543 fdrop(fx); 1544 } else { 1545 fsetfd(lp->lwp_proc->p_fd, NULL, fd); 1546 } 1547 } else { 1548 fsetfd(lp->lwp_proc->p_fd, fp, fd); 1549 } 1550 } 1551 spin_lock(&unp_spin); 1552 fp->f_msgcount--; 1553 unp_rights--; 1554 spin_unlock(&unp_spin); 1555 fdrop(fp); 1556 1557 lwkt_reltoken(&unp_token); 1558 } 1559 1560 void 1561 unp_init(void) 1562 { 1563 TAILQ_INIT(&unp_stream_head.list); 1564 TAILQ_INIT(&unp_dgram_head.list); 1565 TAILQ_INIT(&unp_seqpkt_head.list); 1566 1567 spin_init(&unp_spin, "unpinit"); 1568 1569 SLIST_INIT(&unp_defdiscard_head); 1570 spin_init(&unp_defdiscard_spin, "unpdisc"); 1571 TASK_INIT(&unp_defdiscard_task, 0, unp_defdiscard_taskfunc, NULL); 1572 1573 /* 1574 * Create taskqueue for defered discard, and stick it to 1575 * the last CPU. 1576 */ 1577 unp_taskqueue = taskqueue_create("unp_taskq", M_WAITOK, 1578 taskqueue_thread_enqueue, &unp_taskqueue); 1579 taskqueue_start_threads(&unp_taskqueue, 1, TDPRI_KERN_DAEMON, 1580 ncpus - 1, "unp taskq"); 1581 } 1582 1583 static int 1584 unp_internalize(struct mbuf *control, struct thread *td) 1585 { 1586 struct proc *p = td->td_proc; 1587 struct filedesc *fdescp; 1588 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1589 struct file **rp; 1590 struct file *fp; 1591 int i, fd, *fdp; 1592 struct cmsgcred *cmcred; 1593 int oldfds; 1594 u_int newlen; 1595 int error; 1596 1597 KKASSERT(p); 1598 1599 if ((cm->cmsg_type != SCM_RIGHTS && cm->cmsg_type != SCM_CREDS) || 1600 cm->cmsg_level != SOL_SOCKET || 1601 CMSG_ALIGN(cm->cmsg_len) != control->m_len) 1602 return EINVAL; 1603 1604 /* 1605 * Fill in credential information. 1606 */ 1607 if (cm->cmsg_type == SCM_CREDS) { 1608 cmcred = (struct cmsgcred *)CMSG_DATA(cm); 1609 cmcred->cmcred_pid = p->p_pid; 1610 cmcred->cmcred_uid = p->p_ucred->cr_ruid; 1611 cmcred->cmcred_gid = p->p_ucred->cr_rgid; 1612 cmcred->cmcred_euid = p->p_ucred->cr_uid; 1613 cmcred->cmcred_ngroups = MIN(p->p_ucred->cr_ngroups, 1614 CMGROUP_MAX); 1615 for (i = 0; i < cmcred->cmcred_ngroups; i++) 1616 cmcred->cmcred_groups[i] = p->p_ucred->cr_groups[i]; 1617 return 0; 1618 } 1619 1620 /* 1621 * cmsghdr may not be aligned, do not allow calculation(s) to 1622 * go negative. 1623 */ 1624 if (cm->cmsg_len < CMSG_LEN(0)) 1625 return EINVAL; 1626 1627 oldfds = (cm->cmsg_len - CMSG_LEN(0)) / sizeof(int); 1628 1629 /* 1630 * Now replace the integer FDs with pointers to 1631 * the associated global file table entry.. 1632 * Allocate a bigger buffer as necessary. But if an cluster is not 1633 * enough, return E2BIG. 1634 */ 1635 newlen = CMSG_LEN(oldfds * sizeof(struct file *)); 1636 if (newlen > MCLBYTES) 1637 return E2BIG; 1638 if (newlen - control->m_len > M_TRAILINGSPACE(control)) { 1639 if (control->m_flags & M_EXT) 1640 return E2BIG; 1641 MCLGET(control, M_WAITOK); 1642 if (!(control->m_flags & M_EXT)) 1643 return ENOBUFS; 1644 1645 /* copy the data to the cluster */ 1646 memcpy(mtod(control, char *), cm, cm->cmsg_len); 1647 cm = mtod(control, struct cmsghdr *); 1648 } 1649 1650 fdescp = p->p_fd; 1651 spin_lock_shared(&fdescp->fd_spin); 1652 1653 /* 1654 * check that all the FDs passed in refer to legal OPEN files 1655 * If not, reject the entire operation. 1656 */ 1657 fdp = (int *)CMSG_DATA(cm); 1658 for (i = 0; i < oldfds; i++) { 1659 fd = *fdp++; 1660 if ((unsigned)fd >= fdescp->fd_nfiles || 1661 fdescp->fd_files[fd].fp == NULL) { 1662 error = EBADF; 1663 goto done; 1664 } 1665 if (fdescp->fd_files[fd].fp->f_type == DTYPE_KQUEUE) { 1666 error = EOPNOTSUPP; 1667 goto done; 1668 } 1669 } 1670 1671 /* 1672 * Adjust length, in case sizeof(struct file *) and sizeof(int) 1673 * differs. 1674 */ 1675 cm->cmsg_len = newlen; 1676 control->m_len = CMSG_ALIGN(newlen); 1677 1678 /* 1679 * Transform the file descriptors into struct file pointers. 1680 * If sizeof (struct file *) is bigger than or equal to sizeof int, 1681 * then do it in reverse order so that the int won't get until 1682 * we're done. 1683 * If sizeof (struct file *) is smaller than sizeof int, then 1684 * do it in forward order. 1685 */ 1686 if (sizeof(struct file *) >= sizeof(int)) { 1687 fdp = (int *)CMSG_DATA(cm) + oldfds - 1; 1688 rp = (struct file **)CMSG_DATA(cm) + oldfds - 1; 1689 for (i = 0; i < oldfds; i++) { 1690 fp = fdescp->fd_files[*fdp--].fp; 1691 *rp-- = fp; 1692 fhold(fp); 1693 spin_lock(&unp_spin); 1694 fp->f_msgcount++; 1695 unp_rights++; 1696 spin_unlock(&unp_spin); 1697 } 1698 } else { 1699 /* 1700 * XXX 1701 * Will this ever happen? I don't think compiler will 1702 * generate code for this code segment -- sephe 1703 */ 1704 fdp = (int *)CMSG_DATA(cm); 1705 rp = (struct file **)CMSG_DATA(cm); 1706 for (i = 0; i < oldfds; i++) { 1707 fp = fdescp->fd_files[*fdp++].fp; 1708 *rp++ = fp; 1709 fhold(fp); 1710 spin_lock(&unp_spin); 1711 fp->f_msgcount++; 1712 unp_rights++; 1713 spin_unlock(&unp_spin); 1714 } 1715 } 1716 error = 0; 1717 done: 1718 spin_unlock_shared(&fdescp->fd_spin); 1719 return error; 1720 } 1721 1722 /* 1723 * Garbage collect in-transit file descriptors that get lost due to 1724 * loops (i.e. when a socket is sent to another process over itself, 1725 * and more complex situations). 1726 * 1727 * NOT MPSAFE - TODO socket flush code and maybe closef. Rest is MPSAFE. 1728 */ 1729 1730 struct unp_gc_info { 1731 struct file **extra_ref; 1732 struct file *locked_fp; 1733 int defer; 1734 int index; 1735 int maxindex; 1736 }; 1737 1738 static void 1739 unp_gc(void) 1740 { 1741 struct unp_gc_info info; 1742 static boolean_t unp_gcing; 1743 struct file **fpp; 1744 int i; 1745 1746 /* 1747 * Only one gc can be in-progress at any given moment 1748 */ 1749 spin_lock(&unp_spin); 1750 if (unp_gcing) { 1751 spin_unlock(&unp_spin); 1752 return; 1753 } 1754 unp_gcing = TRUE; 1755 spin_unlock(&unp_spin); 1756 1757 lwkt_gettoken(&unp_token); 1758 1759 /* 1760 * Before going through all this, set all FDs to be NOT defered 1761 * and NOT externally accessible (not marked). During the scan 1762 * a fd can be marked externally accessible but we may or may not 1763 * be able to immediately process it (controlled by FDEFER). 1764 * 1765 * If we loop sleep a bit. The complexity of the topology can cause 1766 * multiple loops. Also failure to acquire the socket's so_rcv 1767 * token can cause us to loop. 1768 */ 1769 allfiles_scan_exclusive(unp_gc_clearmarks, NULL); 1770 do { 1771 info.defer = 0; 1772 allfiles_scan_exclusive(unp_gc_checkmarks, &info); 1773 if (info.defer) 1774 tsleep(&info, 0, "gcagain", 1); 1775 } while (info.defer); 1776 1777 /* 1778 * We grab an extra reference to each of the file table entries 1779 * that are not otherwise accessible and then free the rights 1780 * that are stored in messages on them. 1781 * 1782 * The bug in the orginal code is a little tricky, so I'll describe 1783 * what's wrong with it here. 1784 * 1785 * It is incorrect to simply unp_discard each entry for f_msgcount 1786 * times -- consider the case of sockets A and B that contain 1787 * references to each other. On a last close of some other socket, 1788 * we trigger a gc since the number of outstanding rights (unp_rights) 1789 * is non-zero. If during the sweep phase the gc code un_discards, 1790 * we end up doing a (full) closef on the descriptor. A closef on A 1791 * results in the following chain. Closef calls soo_close, which 1792 * calls soclose. Soclose calls first (through the switch 1793 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 1794 * returns because the previous instance had set unp_gcing, and 1795 * we return all the way back to soclose, which marks the socket 1796 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 1797 * to free up the rights that are queued in messages on the socket A, 1798 * i.e., the reference on B. The sorflush calls via the dom_dispose 1799 * switch unp_dispose, which unp_scans with unp_discard. This second 1800 * instance of unp_discard just calls closef on B. 1801 * 1802 * Well, a similar chain occurs on B, resulting in a sorflush on B, 1803 * which results in another closef on A. Unfortunately, A is already 1804 * being closed, and the descriptor has already been marked with 1805 * SS_NOFDREF, and soclose panics at this point. 1806 * 1807 * Here, we first take an extra reference to each inaccessible 1808 * descriptor. Then, we call sorflush ourself, since we know 1809 * it is a Unix domain socket anyhow. After we destroy all the 1810 * rights carried in messages, we do a last closef to get rid 1811 * of our extra reference. This is the last close, and the 1812 * unp_detach etc will shut down the socket. 1813 * 1814 * 91/09/19, bsy@cs.cmu.edu 1815 */ 1816 info.extra_ref = kmalloc(256 * sizeof(struct file *), M_FILE, M_WAITOK); 1817 info.maxindex = 256; 1818 1819 do { 1820 /* 1821 * Look for matches 1822 */ 1823 info.index = 0; 1824 allfiles_scan_exclusive(unp_gc_checkrefs, &info); 1825 1826 /* 1827 * For each FD on our hit list, do the following two things 1828 */ 1829 for (i = info.index, fpp = info.extra_ref; --i >= 0; ++fpp) { 1830 struct file *tfp = *fpp; 1831 if (tfp->f_type == DTYPE_SOCKET && tfp->f_data != NULL) 1832 sorflush((struct socket *)(tfp->f_data)); 1833 } 1834 for (i = info.index, fpp = info.extra_ref; --i >= 0; ++fpp) 1835 closef(*fpp, NULL); 1836 } while (info.index == info.maxindex); 1837 1838 lwkt_reltoken(&unp_token); 1839 1840 kfree((caddr_t)info.extra_ref, M_FILE); 1841 unp_gcing = FALSE; 1842 } 1843 1844 /* 1845 * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry 1846 */ 1847 static int 1848 unp_gc_checkrefs(struct file *fp, void *data) 1849 { 1850 struct unp_gc_info *info = data; 1851 1852 if (fp->f_count == 0) 1853 return(0); 1854 if (info->index == info->maxindex) 1855 return(-1); 1856 1857 /* 1858 * If all refs are from msgs, and it's not marked accessible 1859 * then it must be referenced from some unreachable cycle 1860 * of (shut-down) FDs, so include it in our 1861 * list of FDs to remove 1862 */ 1863 if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) { 1864 info->extra_ref[info->index++] = fp; 1865 fhold(fp); 1866 } 1867 return(0); 1868 } 1869 1870 /* 1871 * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry 1872 */ 1873 static int 1874 unp_gc_clearmarks(struct file *fp, void *data __unused) 1875 { 1876 atomic_clear_int(&fp->f_flag, FMARK | FDEFER); 1877 return(0); 1878 } 1879 1880 /* 1881 * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry 1882 */ 1883 static int 1884 unp_gc_checkmarks(struct file *fp, void *data) 1885 { 1886 struct unp_gc_info *info = data; 1887 struct socket *so; 1888 1889 /* 1890 * If the file is not open, skip it. Make sure it isn't marked 1891 * defered or we could loop forever, in case we somehow race 1892 * something. 1893 */ 1894 if (fp->f_count == 0) { 1895 if (fp->f_flag & FDEFER) 1896 atomic_clear_int(&fp->f_flag, FDEFER); 1897 return(0); 1898 } 1899 /* 1900 * If we already marked it as 'defer' in a 1901 * previous pass, then try process it this time 1902 * and un-mark it 1903 */ 1904 if (fp->f_flag & FDEFER) { 1905 atomic_clear_int(&fp->f_flag, FDEFER); 1906 } else { 1907 /* 1908 * if it's not defered, then check if it's 1909 * already marked.. if so skip it 1910 */ 1911 if (fp->f_flag & FMARK) 1912 return(0); 1913 /* 1914 * If all references are from messages 1915 * in transit, then skip it. it's not 1916 * externally accessible. 1917 */ 1918 if (fp->f_count == fp->f_msgcount) 1919 return(0); 1920 /* 1921 * If it got this far then it must be 1922 * externally accessible. 1923 */ 1924 atomic_set_int(&fp->f_flag, FMARK); 1925 } 1926 1927 /* 1928 * either it was defered, or it is externally 1929 * accessible and not already marked so. 1930 * Now check if it is possibly one of OUR sockets. 1931 */ 1932 if (fp->f_type != DTYPE_SOCKET || 1933 (so = (struct socket *)fp->f_data) == NULL) { 1934 return(0); 1935 } 1936 if (so->so_proto->pr_domain != &localdomain || 1937 !(so->so_proto->pr_flags & PR_RIGHTS)) { 1938 return(0); 1939 } 1940 1941 /* 1942 * So, Ok, it's one of our sockets and it IS externally accessible 1943 * (or was defered). Now we look to see if we hold any file 1944 * descriptors in its message buffers. Follow those links and mark 1945 * them as accessible too. 1946 * 1947 * We are holding multiple spinlocks here, if we cannot get the 1948 * token non-blocking defer until the next loop. 1949 */ 1950 info->locked_fp = fp; 1951 if (lwkt_trytoken(&so->so_rcv.ssb_token)) { 1952 unp_scan(so->so_rcv.ssb_mb, unp_mark, info); 1953 lwkt_reltoken(&so->so_rcv.ssb_token); 1954 } else { 1955 atomic_set_int(&fp->f_flag, FDEFER); 1956 ++info->defer; 1957 } 1958 return (0); 1959 } 1960 1961 /* 1962 * Scan all unix domain sockets and replace any revoked file pointers 1963 * found with the dummy file pointer fx. We don't worry about races 1964 * against file pointers being read out as those are handled in the 1965 * externalize code. 1966 */ 1967 1968 #define REVOKE_GC_MAXFILES 32 1969 1970 struct unp_revoke_gc_info { 1971 struct file *fx; 1972 struct file *fary[REVOKE_GC_MAXFILES]; 1973 int fcount; 1974 }; 1975 1976 void 1977 unp_revoke_gc(struct file *fx) 1978 { 1979 struct unp_revoke_gc_info info; 1980 int i; 1981 1982 lwkt_gettoken(&unp_token); 1983 info.fx = fx; 1984 do { 1985 info.fcount = 0; 1986 allfiles_scan_exclusive(unp_revoke_gc_check, &info); 1987 for (i = 0; i < info.fcount; ++i) 1988 unp_fp_externalize(NULL, info.fary[i], -1); 1989 } while (info.fcount == REVOKE_GC_MAXFILES); 1990 lwkt_reltoken(&unp_token); 1991 } 1992 1993 /* 1994 * Check for and replace revoked descriptors. 1995 * 1996 * WARNING: This routine is not allowed to block. 1997 */ 1998 static int 1999 unp_revoke_gc_check(struct file *fps, void *vinfo) 2000 { 2001 struct unp_revoke_gc_info *info = vinfo; 2002 struct file *fp; 2003 struct socket *so; 2004 struct mbuf *m0; 2005 struct mbuf *m; 2006 struct file **rp; 2007 struct cmsghdr *cm; 2008 int i; 2009 int qfds; 2010 2011 /* 2012 * Is this a unix domain socket with rights-passing abilities? 2013 */ 2014 if (fps->f_type != DTYPE_SOCKET) 2015 return (0); 2016 if ((so = (struct socket *)fps->f_data) == NULL) 2017 return(0); 2018 if (so->so_proto->pr_domain != &localdomain) 2019 return(0); 2020 if ((so->so_proto->pr_flags & PR_RIGHTS) == 0) 2021 return(0); 2022 2023 /* 2024 * Scan the mbufs for control messages and replace any revoked 2025 * descriptors we find. 2026 */ 2027 lwkt_gettoken(&so->so_rcv.ssb_token); 2028 m0 = so->so_rcv.ssb_mb; 2029 while (m0) { 2030 for (m = m0; m; m = m->m_next) { 2031 if (m->m_type != MT_CONTROL) 2032 continue; 2033 if (m->m_len < sizeof(*cm)) 2034 continue; 2035 cm = mtod(m, struct cmsghdr *); 2036 if (cm->cmsg_level != SOL_SOCKET || 2037 cm->cmsg_type != SCM_RIGHTS) { 2038 continue; 2039 } 2040 qfds = (cm->cmsg_len - CMSG_LEN(0)) / sizeof(void *); 2041 rp = (struct file **)CMSG_DATA(cm); 2042 for (i = 0; i < qfds; i++) { 2043 fp = rp[i]; 2044 if (fp->f_flag & FREVOKED) { 2045 kprintf("Warning: Removing revoked fp from unix domain socket queue\n"); 2046 fhold(info->fx); 2047 info->fx->f_msgcount++; 2048 unp_rights++; 2049 rp[i] = info->fx; 2050 info->fary[info->fcount++] = fp; 2051 } 2052 if (info->fcount == REVOKE_GC_MAXFILES) 2053 break; 2054 } 2055 if (info->fcount == REVOKE_GC_MAXFILES) 2056 break; 2057 } 2058 m0 = m0->m_nextpkt; 2059 if (info->fcount == REVOKE_GC_MAXFILES) 2060 break; 2061 } 2062 lwkt_reltoken(&so->so_rcv.ssb_token); 2063 2064 /* 2065 * Stop the scan if we filled up our array. 2066 */ 2067 if (info->fcount == REVOKE_GC_MAXFILES) 2068 return(-1); 2069 return(0); 2070 } 2071 2072 /* 2073 * Dispose of the fp's stored in a mbuf. 2074 * 2075 * The dds loop can cause additional fps to be entered onto the 2076 * list while it is running, flattening out the operation and avoiding 2077 * a deep kernel stack recursion. 2078 */ 2079 void 2080 unp_dispose(struct mbuf *m) 2081 { 2082 lwkt_gettoken(&unp_token); 2083 if (m) 2084 unp_scan(m, unp_discard, NULL); 2085 lwkt_reltoken(&unp_token); 2086 } 2087 2088 static int 2089 unp_listen(struct unpcb *unp, struct thread *td) 2090 { 2091 struct proc *p = td->td_proc; 2092 2093 ASSERT_LWKT_TOKEN_HELD(&unp_token); 2094 UNP_ASSERT_TOKEN_HELD(unp); 2095 2096 KKASSERT(p); 2097 cru2x(p->p_ucred, &unp->unp_peercred); 2098 unp_setflags(unp, UNP_HAVEPCCACHED); 2099 return (0); 2100 } 2101 2102 static void 2103 unp_scan(struct mbuf *m0, void (*op)(struct file *, void *), void *data) 2104 { 2105 struct mbuf *m; 2106 struct file **rp; 2107 struct cmsghdr *cm; 2108 int i; 2109 int qfds; 2110 2111 while (m0) { 2112 for (m = m0; m; m = m->m_next) { 2113 if (m->m_type == MT_CONTROL && 2114 m->m_len >= sizeof(*cm)) { 2115 cm = mtod(m, struct cmsghdr *); 2116 if (cm->cmsg_level != SOL_SOCKET || 2117 cm->cmsg_type != SCM_RIGHTS) 2118 continue; 2119 qfds = (cm->cmsg_len - CMSG_LEN(0)) / 2120 sizeof(void *); 2121 rp = (struct file **)CMSG_DATA(cm); 2122 for (i = 0; i < qfds; i++) 2123 (*op)(*rp++, data); 2124 break; /* XXX, but saves time */ 2125 } 2126 } 2127 m0 = m0->m_nextpkt; 2128 } 2129 } 2130 2131 /* 2132 * Mark visibility. info->defer is recalculated on every pass. 2133 */ 2134 static void 2135 unp_mark(struct file *fp, void *data) 2136 { 2137 struct unp_gc_info *info = data; 2138 2139 if ((fp->f_flag & FMARK) == 0) { 2140 ++info->defer; 2141 atomic_set_int(&fp->f_flag, FMARK | FDEFER); 2142 } else if (fp->f_flag & FDEFER) { 2143 ++info->defer; 2144 } 2145 } 2146 2147 /* 2148 * Discard a fp previously held in a unix domain socket mbuf. To 2149 * avoid blowing out the kernel stack due to contrived chain-reactions 2150 * we may have to defer the operation to a higher procedural level. 2151 * 2152 * Caller holds unp_token 2153 */ 2154 static void 2155 unp_discard(struct file *fp, void *data __unused) 2156 { 2157 struct unp_defdiscard *d; 2158 2159 spin_lock(&unp_spin); 2160 fp->f_msgcount--; 2161 unp_rights--; 2162 spin_unlock(&unp_spin); 2163 2164 d = kmalloc(sizeof(*d), M_UNPCB, M_WAITOK); 2165 d->fp = fp; 2166 2167 spin_lock(&unp_defdiscard_spin); 2168 SLIST_INSERT_HEAD(&unp_defdiscard_head, d, next); 2169 spin_unlock(&unp_defdiscard_spin); 2170 2171 taskqueue_enqueue(unp_taskqueue, &unp_defdiscard_task); 2172 } 2173 2174 static int 2175 unp_find_lockref(struct sockaddr *nam, struct thread *td, short type, 2176 struct unpcb **unp_ret) 2177 { 2178 struct proc *p = td->td_proc; 2179 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 2180 struct vnode *vp = NULL; 2181 struct socket *so; 2182 struct unpcb *unp; 2183 int error, len; 2184 struct nlookupdata nd; 2185 char buf[SOCK_MAXADDRLEN]; 2186 2187 *unp_ret = NULL; 2188 2189 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); 2190 if (len <= 0) { 2191 error = EINVAL; 2192 goto failed; 2193 } 2194 strncpy(buf, soun->sun_path, len); 2195 buf[len] = 0; 2196 2197 error = nlookup_init(&nd, buf, UIO_SYSSPACE, NLC_FOLLOW); 2198 if (error == 0) 2199 error = nlookup(&nd); 2200 if (error == 0) 2201 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 2202 nlookup_done(&nd); 2203 if (error) { 2204 vp = NULL; 2205 goto failed; 2206 } 2207 2208 if (vp->v_type != VSOCK) { 2209 error = ENOTSOCK; 2210 goto failed; 2211 } 2212 error = VOP_EACCESS(vp, VWRITE, p->p_ucred); 2213 if (error) 2214 goto failed; 2215 so = vp->v_socket; 2216 if (so == NULL) { 2217 error = ECONNREFUSED; 2218 goto failed; 2219 } 2220 if (so->so_type != type) { 2221 error = EPROTOTYPE; 2222 goto failed; 2223 } 2224 2225 /* Lock this unp. */ 2226 unp = unp_getsocktoken(so); 2227 if (!UNP_ISATTACHED(unp)) { 2228 unp_reltoken(unp); 2229 error = ECONNREFUSED; 2230 goto failed; 2231 } 2232 /* And keep this unp referenced. */ 2233 unp_reference(unp); 2234 2235 /* Done! */ 2236 *unp_ret = unp; 2237 error = 0; 2238 failed: 2239 if (vp != NULL) 2240 vput(vp); 2241 return error; 2242 } 2243 2244 static int 2245 unp_connect_pair(struct unpcb *unp, struct unpcb *unp2) 2246 { 2247 struct socket *so = unp->unp_socket; 2248 struct socket *so2 = unp2->unp_socket; 2249 2250 ASSERT_LWKT_TOKEN_HELD(&unp_token); 2251 UNP_ASSERT_TOKEN_HELD(unp); 2252 UNP_ASSERT_TOKEN_HELD(unp2); 2253 2254 KASSERT(so->so_type == so2->so_type, 2255 ("socket type mismatch, so %d, so2 %d", so->so_type, so2->so_type)); 2256 2257 if (!UNP_ISATTACHED(unp)) 2258 return EINVAL; 2259 if (!UNP_ISATTACHED(unp2)) 2260 return ECONNREFUSED; 2261 2262 KASSERT(unp->unp_conn == NULL, ("unp is already connected")); 2263 unp->unp_conn = unp2; 2264 2265 switch (so->so_type) { 2266 case SOCK_DGRAM: 2267 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); 2268 soisconnected(so); 2269 break; 2270 2271 case SOCK_STREAM: 2272 case SOCK_SEQPACKET: 2273 KASSERT(unp2->unp_conn == NULL, ("unp2 is already connected")); 2274 unp2->unp_conn = unp; 2275 soisconnected(so); 2276 soisconnected(so2); 2277 break; 2278 2279 default: 2280 panic("unp_connect_pair: unknown socket type %d", so->so_type); 2281 } 2282 return 0; 2283 } 2284 2285 static void 2286 unp_drop(struct unpcb *unp, int error) 2287 { 2288 struct unpcb *unp2; 2289 2290 ASSERT_LWKT_TOKEN_HELD(&unp_token); 2291 UNP_ASSERT_TOKEN_HELD(unp); 2292 KASSERT(unp->unp_flags & UNP_DETACHED, ("unp is not detached")); 2293 2294 unp_disconnect(unp, error); 2295 2296 while ((unp2 = LIST_FIRST(&unp->unp_refs)) != NULL) { 2297 lwkt_getpooltoken(unp2); 2298 unp_disconnect(unp2, ECONNRESET); 2299 lwkt_relpooltoken(unp2); 2300 } 2301 unp_setflags(unp, UNP_DROPPED); 2302 } 2303 2304 static void 2305 unp_defdiscard_taskfunc(void *arg __unused, int pending __unused) 2306 { 2307 struct unp_defdiscard *d; 2308 2309 spin_lock(&unp_defdiscard_spin); 2310 while ((d = SLIST_FIRST(&unp_defdiscard_head)) != NULL) { 2311 SLIST_REMOVE_HEAD(&unp_defdiscard_head, next); 2312 spin_unlock(&unp_defdiscard_spin); 2313 2314 closef(d->fp, NULL); 2315 kfree(d, M_UNPCB); 2316 2317 spin_lock(&unp_defdiscard_spin); 2318 } 2319 spin_unlock(&unp_defdiscard_spin); 2320 } 2321