1 /* 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 34 * $FreeBSD: src/sys/kern/uipc_usrreq.c,v 1.54.2.10 2003/03/04 17:28:09 nectar Exp $ 35 * $DragonFly: src/sys/kern/uipc_usrreq.c,v 1.44 2008/09/06 05:44:58 dillon Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/kernel.h> 41 #include <sys/domain.h> 42 #include <sys/fcntl.h> 43 #include <sys/malloc.h> /* XXX must be before <sys/file.h> */ 44 #include <sys/proc.h> 45 #include <sys/file.h> 46 #include <sys/filedesc.h> 47 #include <sys/mbuf.h> 48 #include <sys/nlookup.h> 49 #include <sys/protosw.h> 50 #include <sys/socket.h> 51 #include <sys/socketvar.h> 52 #include <sys/resourcevar.h> 53 #include <sys/stat.h> 54 #include <sys/mount.h> 55 #include <sys/sysctl.h> 56 #include <sys/un.h> 57 #include <sys/unpcb.h> 58 #include <sys/vnode.h> 59 60 #include <sys/file2.h> 61 #include <sys/spinlock2.h> 62 #include <sys/socketvar2.h> 63 64 static MALLOC_DEFINE(M_UNPCB, "unpcb", "unpcb struct"); 65 static unp_gen_t unp_gencnt; 66 static u_int unp_count; 67 68 static struct unp_head unp_shead, unp_dhead; 69 70 static struct lwkt_token unp_token = LWKT_TOKEN_MP_INITIALIZER(unp_token); 71 72 /* 73 * Unix communications domain. 74 * 75 * TODO: 76 * RDM 77 * rethink name space problems 78 * need a proper out-of-band 79 * lock pushdown 80 */ 81 static struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; 82 static ino_t unp_ino = 1; /* prototype for fake inode numbers */ 83 static struct spinlock unp_ino_spin = SPINLOCK_INITIALIZER(&unp_ino_spin); 84 85 static int unp_attach (struct socket *, struct pru_attach_info *); 86 static void unp_detach (struct unpcb *); 87 static int unp_bind (struct unpcb *,struct sockaddr *, struct thread *); 88 static int unp_connect (struct socket *,struct sockaddr *, 89 struct thread *); 90 static void unp_disconnect (struct unpcb *); 91 static void unp_shutdown (struct unpcb *); 92 static void unp_drop (struct unpcb *, int); 93 static void unp_gc (void); 94 static int unp_gc_clearmarks(struct file *, void *); 95 static int unp_gc_checkmarks(struct file *, void *); 96 static int unp_gc_checkrefs(struct file *, void *); 97 static int unp_revoke_gc_check(struct file *, void *); 98 static void unp_scan (struct mbuf *, void (*)(struct file *, void *), 99 void *data); 100 static void unp_mark (struct file *, void *data); 101 static void unp_discard (struct file *, void *); 102 static int unp_internalize (struct mbuf *, struct thread *); 103 static int unp_listen (struct unpcb *, struct thread *); 104 static void unp_fp_externalize(struct lwp *lp, struct file *fp, int fd); 105 106 /* 107 * NOTE: (so) is referenced from soabort*() and netmsg_pru_abort() 108 * will sofree() it when we return. 109 */ 110 static int 111 uipc_abort(struct socket *so) 112 { 113 struct unpcb *unp; 114 int error; 115 116 lwkt_gettoken(&unp_token); 117 unp = so->so_pcb; 118 if (unp) { 119 unp_drop(unp, ECONNABORTED); 120 unp_detach(unp); 121 error = 0; 122 } else { 123 error = EINVAL; 124 } 125 lwkt_reltoken(&unp_token); 126 127 return error; 128 } 129 130 static int 131 uipc_accept(struct socket *so, struct sockaddr **nam) 132 { 133 struct unpcb *unp; 134 135 lwkt_gettoken(&unp_token); 136 unp = so->so_pcb; 137 if (unp == NULL) { 138 lwkt_reltoken(&unp_token); 139 return EINVAL; 140 } 141 142 /* 143 * Pass back name of connected socket, 144 * if it was bound and we are still connected 145 * (our peer may have closed already!). 146 */ 147 if (unp->unp_conn && unp->unp_conn->unp_addr) { 148 *nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr); 149 } else { 150 *nam = dup_sockaddr((struct sockaddr *)&sun_noname); 151 } 152 lwkt_reltoken(&unp_token); 153 return 0; 154 } 155 156 static int 157 uipc_attach(struct socket *so, int proto, struct pru_attach_info *ai) 158 { 159 struct unpcb *unp; 160 int error; 161 162 lwkt_gettoken(&unp_token); 163 unp = so->so_pcb; 164 if (unp) 165 error = EISCONN; 166 else 167 error = unp_attach(so, ai); 168 lwkt_reltoken(&unp_token); 169 170 return error; 171 } 172 173 static int 174 uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 175 { 176 struct unpcb *unp; 177 int error; 178 179 lwkt_gettoken(&unp_token); 180 unp = so->so_pcb; 181 if (unp) 182 error = unp_bind(unp, nam, td); 183 else 184 error = EINVAL; 185 lwkt_reltoken(&unp_token); 186 187 return error; 188 } 189 190 static int 191 uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 192 { 193 struct unpcb *unp; 194 int error; 195 196 lwkt_gettoken(&unp_token); 197 unp = so->so_pcb; 198 if (unp) 199 error = unp_connect(so, nam, td); 200 else 201 error = EINVAL; 202 lwkt_reltoken(&unp_token); 203 204 return error; 205 } 206 207 static int 208 uipc_connect2(struct socket *so1, struct socket *so2) 209 { 210 struct unpcb *unp; 211 int error; 212 213 lwkt_gettoken(&unp_token); 214 unp = so1->so_pcb; 215 if (unp) 216 error = unp_connect2(so1, so2); 217 else 218 error = EINVAL; 219 lwkt_reltoken(&unp_token); 220 221 return error; 222 } 223 224 /* control is EOPNOTSUPP */ 225 226 static int 227 uipc_detach(struct socket *so) 228 { 229 struct unpcb *unp; 230 int error; 231 232 lwkt_gettoken(&unp_token); 233 unp = so->so_pcb; 234 if (unp) { 235 unp_detach(unp); 236 error = 0; 237 } else { 238 error = EINVAL; 239 } 240 lwkt_reltoken(&unp_token); 241 242 return error; 243 } 244 245 static int 246 uipc_disconnect(struct socket *so) 247 { 248 struct unpcb *unp; 249 int error; 250 251 lwkt_gettoken(&unp_token); 252 unp = so->so_pcb; 253 if (unp) { 254 unp_disconnect(unp); 255 error = 0; 256 } else { 257 error = EINVAL; 258 } 259 lwkt_reltoken(&unp_token); 260 261 return error; 262 } 263 264 static int 265 uipc_listen(struct socket *so, struct thread *td) 266 { 267 struct unpcb *unp; 268 int error; 269 270 lwkt_gettoken(&unp_token); 271 unp = so->so_pcb; 272 if (unp == NULL || unp->unp_vnode == NULL) 273 error = EINVAL; 274 else 275 error = unp_listen(unp, td); 276 lwkt_reltoken(&unp_token); 277 278 return error; 279 } 280 281 static int 282 uipc_peeraddr(struct socket *so, struct sockaddr **nam) 283 { 284 struct unpcb *unp; 285 int error; 286 287 lwkt_gettoken(&unp_token); 288 unp = so->so_pcb; 289 if (unp == NULL) { 290 error = EINVAL; 291 } else if (unp->unp_conn && unp->unp_conn->unp_addr) { 292 *nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr); 293 error = 0; 294 } else { 295 /* 296 * XXX: It seems that this test always fails even when 297 * connection is established. So, this else clause is 298 * added as workaround to return PF_LOCAL sockaddr. 299 */ 300 *nam = dup_sockaddr((struct sockaddr *)&sun_noname); 301 error = 0; 302 } 303 lwkt_reltoken(&unp_token); 304 305 return error; 306 } 307 308 static int 309 uipc_rcvd(struct socket *so, int flags) 310 { 311 struct unpcb *unp; 312 struct socket *so2; 313 314 lwkt_gettoken(&unp_token); 315 unp = so->so_pcb; 316 if (unp == NULL) { 317 lwkt_reltoken(&unp_token); 318 return EINVAL; 319 } 320 321 switch (so->so_type) { 322 case SOCK_DGRAM: 323 panic("uipc_rcvd DGRAM?"); 324 /*NOTREACHED*/ 325 case SOCK_STREAM: 326 case SOCK_SEQPACKET: 327 if (unp->unp_conn == NULL) 328 break; 329 /* 330 * Because we are transfering mbufs directly to the 331 * peer socket we have to use SSB_STOP on the sender 332 * to prevent it from building up infinite mbufs. 333 */ 334 so2 = unp->unp_conn->unp_socket; 335 if (so->so_rcv.ssb_cc < so2->so_snd.ssb_hiwat && 336 so->so_rcv.ssb_mbcnt < so2->so_snd.ssb_mbmax 337 ) { 338 atomic_clear_int(&so2->so_snd.ssb_flags, SSB_STOP); 339 sowwakeup(so2); 340 } 341 break; 342 default: 343 panic("uipc_rcvd unknown socktype"); 344 /*NOTREACHED*/ 345 } 346 lwkt_reltoken(&unp_token); 347 348 return 0; 349 } 350 351 /* pru_rcvoob is EOPNOTSUPP */ 352 353 static int 354 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, 355 struct mbuf *control, struct thread *td) 356 { 357 struct unpcb *unp; 358 struct socket *so2; 359 int error = 0; 360 361 lwkt_gettoken(&unp_token); 362 363 unp = so->so_pcb; 364 if (unp == NULL) { 365 error = EINVAL; 366 goto release; 367 } 368 if (flags & PRUS_OOB) { 369 error = EOPNOTSUPP; 370 goto release; 371 } 372 373 if (control && (error = unp_internalize(control, td))) 374 goto release; 375 376 switch (so->so_type) { 377 case SOCK_DGRAM: 378 { 379 struct sockaddr *from; 380 381 if (nam) { 382 if (unp->unp_conn) { 383 error = EISCONN; 384 break; 385 } 386 error = unp_connect(so, nam, td); 387 if (error) 388 break; 389 } else { 390 if (unp->unp_conn == NULL) { 391 error = ENOTCONN; 392 break; 393 } 394 } 395 so2 = unp->unp_conn->unp_socket; 396 if (unp->unp_addr) 397 from = (struct sockaddr *)unp->unp_addr; 398 else 399 from = &sun_noname; 400 if (ssb_appendaddr(&so2->so_rcv, from, m, control)) { 401 sorwakeup(so2); 402 m = NULL; 403 control = NULL; 404 } else { 405 error = ENOBUFS; 406 } 407 if (nam) 408 unp_disconnect(unp); 409 break; 410 } 411 412 case SOCK_STREAM: 413 case SOCK_SEQPACKET: 414 /* Connect if not connected yet. */ 415 /* 416 * Note: A better implementation would complain 417 * if not equal to the peer's address. 418 */ 419 if (!(so->so_state & SS_ISCONNECTED)) { 420 if (nam) { 421 error = unp_connect(so, nam, td); 422 if (error) 423 break; /* XXX */ 424 } else { 425 error = ENOTCONN; 426 break; 427 } 428 } 429 430 if (so->so_state & SS_CANTSENDMORE) { 431 error = EPIPE; 432 break; 433 } 434 if (unp->unp_conn == NULL) 435 panic("uipc_send connected but no connection?"); 436 so2 = unp->unp_conn->unp_socket; 437 /* 438 * Send to paired receive port, and then reduce 439 * send buffer hiwater marks to maintain backpressure. 440 * Wake up readers. 441 */ 442 if (control) { 443 if (ssb_appendcontrol(&so2->so_rcv, m, control)) { 444 control = NULL; 445 m = NULL; 446 } 447 } else if (so->so_type == SOCK_SEQPACKET) { 448 sbappendrecord(&so2->so_rcv.sb, m); 449 m = NULL; 450 } else { 451 sbappend(&so2->so_rcv.sb, m); 452 m = NULL; 453 } 454 455 /* 456 * Because we are transfering mbufs directly to the 457 * peer socket we have to use SSB_STOP on the sender 458 * to prevent it from building up infinite mbufs. 459 */ 460 if (so2->so_rcv.ssb_cc >= so->so_snd.ssb_hiwat || 461 so2->so_rcv.ssb_mbcnt >= so->so_snd.ssb_mbmax 462 ) { 463 atomic_set_int(&so->so_snd.ssb_flags, SSB_STOP); 464 } 465 sorwakeup(so2); 466 break; 467 468 default: 469 panic("uipc_send unknown socktype"); 470 } 471 472 /* 473 * SEND_EOF is equivalent to a SEND followed by a SHUTDOWN. 474 */ 475 if (flags & PRUS_EOF) { 476 socantsendmore(so); 477 unp_shutdown(unp); 478 } 479 480 if (control && error != 0) 481 unp_dispose(control); 482 483 release: 484 lwkt_reltoken(&unp_token); 485 486 if (control) 487 m_freem(control); 488 if (m) 489 m_freem(m); 490 return error; 491 } 492 493 /* 494 * MPSAFE 495 */ 496 static int 497 uipc_sense(struct socket *so, struct stat *sb) 498 { 499 struct unpcb *unp; 500 501 lwkt_gettoken(&unp_token); 502 unp = so->so_pcb; 503 if (unp == NULL) { 504 lwkt_reltoken(&unp_token); 505 return EINVAL; 506 } 507 sb->st_blksize = so->so_snd.ssb_hiwat; 508 sb->st_dev = NOUDEV; 509 if (unp->unp_ino == 0) { /* make up a non-zero inode number */ 510 spin_lock(&unp_ino_spin); 511 unp->unp_ino = unp_ino++; 512 spin_unlock(&unp_ino_spin); 513 } 514 sb->st_ino = unp->unp_ino; 515 lwkt_reltoken(&unp_token); 516 517 return (0); 518 } 519 520 static int 521 uipc_shutdown(struct socket *so) 522 { 523 struct unpcb *unp; 524 int error; 525 526 lwkt_gettoken(&unp_token); 527 unp = so->so_pcb; 528 if (unp) { 529 socantsendmore(so); 530 unp_shutdown(unp); 531 error = 0; 532 } else { 533 error = EINVAL; 534 } 535 lwkt_reltoken(&unp_token); 536 537 return error; 538 } 539 540 static int 541 uipc_sockaddr(struct socket *so, struct sockaddr **nam) 542 { 543 struct unpcb *unp; 544 int error; 545 546 lwkt_gettoken(&unp_token); 547 unp = so->so_pcb; 548 if (unp) { 549 if (unp->unp_addr) 550 *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr); 551 error = 0; 552 } else { 553 error = EINVAL; 554 } 555 lwkt_reltoken(&unp_token); 556 557 return error; 558 } 559 560 struct pr_usrreqs uipc_usrreqs = { 561 .pru_abort = uipc_abort, 562 .pru_accept = uipc_accept, 563 .pru_attach = uipc_attach, 564 .pru_bind = uipc_bind, 565 .pru_connect = uipc_connect, 566 .pru_connect2 = uipc_connect2, 567 .pru_control = pru_control_notsupp, 568 .pru_detach = uipc_detach, 569 .pru_disconnect = uipc_disconnect, 570 .pru_listen = uipc_listen, 571 .pru_peeraddr = uipc_peeraddr, 572 .pru_rcvd = uipc_rcvd, 573 .pru_rcvoob = pru_rcvoob_notsupp, 574 .pru_send = uipc_send, 575 .pru_sense = uipc_sense, 576 .pru_shutdown = uipc_shutdown, 577 .pru_sockaddr = uipc_sockaddr, 578 .pru_sosend = sosend, 579 .pru_soreceive = soreceive 580 }; 581 582 int 583 uipc_ctloutput(struct socket *so, struct sockopt *sopt) 584 { 585 struct unpcb *unp; 586 int error = 0; 587 588 lwkt_gettoken(&unp_token); 589 unp = so->so_pcb; 590 591 switch (sopt->sopt_dir) { 592 case SOPT_GET: 593 switch (sopt->sopt_name) { 594 case LOCAL_PEERCRED: 595 if (unp->unp_flags & UNP_HAVEPC) 596 soopt_from_kbuf(sopt, &unp->unp_peercred, 597 sizeof(unp->unp_peercred)); 598 else { 599 if (so->so_type == SOCK_STREAM) 600 error = ENOTCONN; 601 else if (so->so_type == SOCK_SEQPACKET) 602 error = ENOTCONN; 603 else 604 error = EINVAL; 605 } 606 break; 607 default: 608 error = EOPNOTSUPP; 609 break; 610 } 611 break; 612 case SOPT_SET: 613 default: 614 error = EOPNOTSUPP; 615 break; 616 } 617 lwkt_reltoken(&unp_token); 618 619 return (error); 620 } 621 622 /* 623 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 624 * for stream sockets, although the total for sender and receiver is 625 * actually only PIPSIZ. 626 * 627 * Datagram sockets really use the sendspace as the maximum datagram size, 628 * and don't really want to reserve the sendspace. Their recvspace should 629 * be large enough for at least one max-size datagram plus address. 630 * 631 * We want the local send/recv space to be significant larger then lo0's 632 * mtu of 16384. 633 */ 634 #ifndef PIPSIZ 635 #define PIPSIZ 57344 636 #endif 637 static u_long unpst_sendspace = PIPSIZ; 638 static u_long unpst_recvspace = PIPSIZ; 639 static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 640 static u_long unpdg_recvspace = 4*1024; 641 642 static int unp_rights; /* file descriptors in flight */ 643 static struct spinlock unp_spin = SPINLOCK_INITIALIZER(&unp_spin); 644 645 SYSCTL_DECL(_net_local_seqpacket); 646 SYSCTL_DECL(_net_local_stream); 647 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, 648 &unpst_sendspace, 0, ""); 649 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, 650 &unpst_recvspace, 0, ""); 651 652 SYSCTL_DECL(_net_local_dgram); 653 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, 654 &unpdg_sendspace, 0, ""); 655 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, 656 &unpdg_recvspace, 0, ""); 657 658 SYSCTL_DECL(_net_local); 659 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, ""); 660 661 static int 662 unp_attach(struct socket *so, struct pru_attach_info *ai) 663 { 664 struct unpcb *unp; 665 int error; 666 667 lwkt_gettoken(&unp_token); 668 if (so->so_snd.ssb_hiwat == 0 || so->so_rcv.ssb_hiwat == 0) { 669 switch (so->so_type) { 670 671 case SOCK_STREAM: 672 case SOCK_SEQPACKET: 673 error = soreserve(so, unpst_sendspace, unpst_recvspace, 674 ai->sb_rlimit); 675 break; 676 677 case SOCK_DGRAM: 678 error = soreserve(so, unpdg_sendspace, unpdg_recvspace, 679 ai->sb_rlimit); 680 break; 681 682 default: 683 panic("unp_attach"); 684 } 685 if (error) 686 goto failed; 687 } 688 unp = kmalloc(sizeof(*unp), M_UNPCB, M_NOWAIT|M_ZERO); 689 if (unp == NULL) { 690 error = ENOBUFS; 691 goto failed; 692 } 693 unp->unp_gencnt = ++unp_gencnt; 694 unp_count++; 695 LIST_INIT(&unp->unp_refs); 696 unp->unp_socket = so; 697 unp->unp_rvnode = ai->fd_rdir; /* jail cruft XXX JH */ 698 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead 699 : &unp_shead, unp, unp_link); 700 so->so_pcb = (caddr_t)unp; 701 soreference(so); 702 so->so_port = sync_soport(so, NULL, NULL); 703 error = 0; 704 failed: 705 lwkt_reltoken(&unp_token); 706 return error; 707 } 708 709 static void 710 unp_detach(struct unpcb *unp) 711 { 712 struct socket *so; 713 714 lwkt_gettoken(&unp_token); 715 716 LIST_REMOVE(unp, unp_link); 717 unp->unp_gencnt = ++unp_gencnt; 718 --unp_count; 719 if (unp->unp_vnode) { 720 unp->unp_vnode->v_socket = NULL; 721 vrele(unp->unp_vnode); 722 unp->unp_vnode = NULL; 723 } 724 if (unp->unp_conn) 725 unp_disconnect(unp); 726 while (!LIST_EMPTY(&unp->unp_refs)) 727 unp_drop(LIST_FIRST(&unp->unp_refs), ECONNRESET); 728 soisdisconnected(unp->unp_socket); 729 so = unp->unp_socket; 730 soreference(so); /* for delayed sorflush */ 731 so->so_pcb = NULL; 732 unp->unp_socket = NULL; 733 sofree(so); /* remove pcb ref */ 734 735 if (unp_rights) { 736 /* 737 * Normally the receive buffer is flushed later, 738 * in sofree, but if our receive buffer holds references 739 * to descriptors that are now garbage, we will dispose 740 * of those descriptor references after the garbage collector 741 * gets them (resulting in a "panic: closef: count < 0"). 742 */ 743 sorflush(so); 744 unp_gc(); 745 } 746 sofree(so); 747 lwkt_reltoken(&unp_token); 748 749 if (unp->unp_addr) 750 kfree(unp->unp_addr, M_SONAME); 751 kfree(unp, M_UNPCB); 752 } 753 754 static int 755 unp_bind(struct unpcb *unp, struct sockaddr *nam, struct thread *td) 756 { 757 struct proc *p = td->td_proc; 758 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 759 struct vnode *vp; 760 struct vattr vattr; 761 int error, namelen; 762 struct nlookupdata nd; 763 char buf[SOCK_MAXADDRLEN]; 764 765 lwkt_gettoken(&unp_token); 766 if (unp->unp_vnode != NULL) { 767 error = EINVAL; 768 goto failed; 769 } 770 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); 771 if (namelen <= 0) { 772 error = EINVAL; 773 goto failed; 774 } 775 strncpy(buf, soun->sun_path, namelen); 776 buf[namelen] = 0; /* null-terminate the string */ 777 error = nlookup_init(&nd, buf, UIO_SYSSPACE, 778 NLC_LOCKVP | NLC_CREATE | NLC_REFDVP); 779 if (error == 0) 780 error = nlookup(&nd); 781 if (error == 0 && nd.nl_nch.ncp->nc_vp != NULL) 782 error = EADDRINUSE; 783 if (error) 784 goto done; 785 786 VATTR_NULL(&vattr); 787 vattr.va_type = VSOCK; 788 vattr.va_mode = (ACCESSPERMS & ~p->p_fd->fd_cmask); 789 error = VOP_NCREATE(&nd.nl_nch, nd.nl_dvp, &vp, nd.nl_cred, &vattr); 790 if (error == 0) { 791 vp->v_socket = unp->unp_socket; 792 unp->unp_vnode = vp; 793 unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam); 794 vn_unlock(vp); 795 } 796 done: 797 nlookup_done(&nd); 798 failed: 799 lwkt_reltoken(&unp_token); 800 return (error); 801 } 802 803 static int 804 unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 805 { 806 struct proc *p = td->td_proc; 807 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 808 struct vnode *vp; 809 struct socket *so2, *so3; 810 struct unpcb *unp, *unp2, *unp3; 811 int error, len; 812 struct nlookupdata nd; 813 char buf[SOCK_MAXADDRLEN]; 814 815 lwkt_gettoken(&unp_token); 816 817 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); 818 if (len <= 0) { 819 error = EINVAL; 820 goto failed; 821 } 822 strncpy(buf, soun->sun_path, len); 823 buf[len] = 0; 824 825 vp = NULL; 826 error = nlookup_init(&nd, buf, UIO_SYSSPACE, NLC_FOLLOW); 827 if (error == 0) 828 error = nlookup(&nd); 829 if (error == 0) 830 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 831 nlookup_done(&nd); 832 if (error) 833 goto failed; 834 835 if (vp->v_type != VSOCK) { 836 error = ENOTSOCK; 837 goto bad; 838 } 839 error = VOP_ACCESS(vp, VWRITE, p->p_ucred); 840 if (error) 841 goto bad; 842 so2 = vp->v_socket; 843 if (so2 == NULL) { 844 error = ECONNREFUSED; 845 goto bad; 846 } 847 if (so->so_type != so2->so_type) { 848 error = EPROTOTYPE; 849 goto bad; 850 } 851 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 852 if (!(so2->so_options & SO_ACCEPTCONN) || 853 (so3 = sonewconn(so2, 0)) == NULL) { 854 error = ECONNREFUSED; 855 goto bad; 856 } 857 unp = so->so_pcb; 858 unp2 = so2->so_pcb; 859 unp3 = so3->so_pcb; 860 if (unp2->unp_addr) 861 unp3->unp_addr = (struct sockaddr_un *) 862 dup_sockaddr((struct sockaddr *)unp2->unp_addr); 863 864 /* 865 * unp_peercred management: 866 * 867 * The connecter's (client's) credentials are copied 868 * from its process structure at the time of connect() 869 * (which is now). 870 */ 871 cru2x(p->p_ucred, &unp3->unp_peercred); 872 unp3->unp_flags |= UNP_HAVEPC; 873 /* 874 * The receiver's (server's) credentials are copied 875 * from the unp_peercred member of socket on which the 876 * former called listen(); unp_listen() cached that 877 * process's credentials at that time so we can use 878 * them now. 879 */ 880 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED, 881 ("unp_connect: listener without cached peercred")); 882 memcpy(&unp->unp_peercred, &unp2->unp_peercred, 883 sizeof(unp->unp_peercred)); 884 unp->unp_flags |= UNP_HAVEPC; 885 886 so2 = so3; 887 } 888 error = unp_connect2(so, so2); 889 bad: 890 vput(vp); 891 failed: 892 lwkt_reltoken(&unp_token); 893 return (error); 894 } 895 896 int 897 unp_connect2(struct socket *so, struct socket *so2) 898 { 899 struct unpcb *unp; 900 struct unpcb *unp2; 901 902 lwkt_gettoken(&unp_token); 903 unp = so->so_pcb; 904 if (so2->so_type != so->so_type) { 905 lwkt_reltoken(&unp_token); 906 return (EPROTOTYPE); 907 } 908 unp2 = so2->so_pcb; 909 unp->unp_conn = unp2; 910 911 switch (so->so_type) { 912 case SOCK_DGRAM: 913 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); 914 soisconnected(so); 915 break; 916 917 case SOCK_STREAM: 918 case SOCK_SEQPACKET: 919 unp2->unp_conn = unp; 920 soisconnected(so); 921 soisconnected(so2); 922 break; 923 924 default: 925 panic("unp_connect2"); 926 } 927 lwkt_reltoken(&unp_token); 928 return (0); 929 } 930 931 static void 932 unp_disconnect(struct unpcb *unp) 933 { 934 struct unpcb *unp2; 935 936 lwkt_gettoken(&unp_token); 937 938 unp2 = unp->unp_conn; 939 if (unp2 == NULL) { 940 lwkt_reltoken(&unp_token); 941 return; 942 } 943 944 unp->unp_conn = NULL; 945 946 switch (unp->unp_socket->so_type) { 947 case SOCK_DGRAM: 948 LIST_REMOVE(unp, unp_reflink); 949 soclrstate(unp->unp_socket, SS_ISCONNECTED); 950 break; 951 case SOCK_STREAM: 952 case SOCK_SEQPACKET: 953 soisdisconnected(unp->unp_socket); 954 unp2->unp_conn = NULL; 955 soisdisconnected(unp2->unp_socket); 956 break; 957 } 958 lwkt_reltoken(&unp_token); 959 } 960 961 #ifdef notdef 962 void 963 unp_abort(struct unpcb *unp) 964 { 965 lwkt_gettoken(&unp_token); 966 unp_detach(unp); 967 lwkt_reltoken(&unp_token); 968 } 969 #endif 970 971 static int 972 prison_unpcb(struct thread *td, struct unpcb *unp) 973 { 974 struct proc *p; 975 976 if (td == NULL) 977 return (0); 978 if ((p = td->td_proc) == NULL) 979 return (0); 980 if (!p->p_ucred->cr_prison) 981 return (0); 982 if (p->p_fd->fd_rdir == unp->unp_rvnode) 983 return (0); 984 return (1); 985 } 986 987 static int 988 unp_pcblist(SYSCTL_HANDLER_ARGS) 989 { 990 int error, i, n; 991 struct unpcb *unp, **unp_list; 992 unp_gen_t gencnt; 993 struct unp_head *head; 994 995 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); 996 997 KKASSERT(curproc != NULL); 998 999 /* 1000 * The process of preparing the PCB list is too time-consuming and 1001 * resource-intensive to repeat twice on every request. 1002 */ 1003 if (req->oldptr == NULL) { 1004 n = unp_count; 1005 req->oldidx = (n + n/8) * sizeof(struct xunpcb); 1006 return 0; 1007 } 1008 1009 if (req->newptr != NULL) 1010 return EPERM; 1011 1012 lwkt_gettoken(&unp_token); 1013 1014 /* 1015 * OK, now we're committed to doing something. 1016 */ 1017 gencnt = unp_gencnt; 1018 n = unp_count; 1019 1020 unp_list = kmalloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); 1021 1022 for (unp = LIST_FIRST(head), i = 0; unp && i < n; 1023 unp = LIST_NEXT(unp, unp_link)) { 1024 if (unp->unp_gencnt <= gencnt && !prison_unpcb(req->td, unp)) 1025 unp_list[i++] = unp; 1026 } 1027 n = i; /* in case we lost some during malloc */ 1028 1029 error = 0; 1030 for (i = 0; i < n; i++) { 1031 unp = unp_list[i]; 1032 if (unp->unp_gencnt <= gencnt) { 1033 struct xunpcb xu; 1034 xu.xu_len = sizeof xu; 1035 xu.xu_unpp = unp; 1036 /* 1037 * XXX - need more locking here to protect against 1038 * connect/disconnect races for SMP. 1039 */ 1040 if (unp->unp_addr) 1041 bcopy(unp->unp_addr, &xu.xu_addr, 1042 unp->unp_addr->sun_len); 1043 if (unp->unp_conn && unp->unp_conn->unp_addr) 1044 bcopy(unp->unp_conn->unp_addr, 1045 &xu.xu_caddr, 1046 unp->unp_conn->unp_addr->sun_len); 1047 bcopy(unp, &xu.xu_unp, sizeof *unp); 1048 sotoxsocket(unp->unp_socket, &xu.xu_socket); 1049 error = SYSCTL_OUT(req, &xu, sizeof xu); 1050 } 1051 } 1052 lwkt_reltoken(&unp_token); 1053 kfree(unp_list, M_TEMP); 1054 1055 return error; 1056 } 1057 1058 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, 1059 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", 1060 "List of active local datagram sockets"); 1061 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, 1062 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", 1063 "List of active local stream sockets"); 1064 SYSCTL_PROC(_net_local_seqpacket, OID_AUTO, pcblist, CTLFLAG_RD, 1065 (caddr_t)(long)SOCK_SEQPACKET, 0, unp_pcblist, "S,xunpcb", 1066 "List of active local seqpacket stream sockets"); 1067 1068 static void 1069 unp_shutdown(struct unpcb *unp) 1070 { 1071 struct socket *so; 1072 1073 if ((unp->unp_socket->so_type == SOCK_STREAM || 1074 unp->unp_socket->so_type == SOCK_SEQPACKET) && 1075 unp->unp_conn != NULL && (so = unp->unp_conn->unp_socket)) { 1076 socantrcvmore(so); 1077 } 1078 } 1079 1080 static void 1081 unp_drop(struct unpcb *unp, int err) 1082 { 1083 struct socket *so = unp->unp_socket; 1084 1085 so->so_error = err; 1086 unp_disconnect(unp); 1087 } 1088 1089 #ifdef notdef 1090 void 1091 unp_drain(void) 1092 { 1093 lwkt_gettoken(&unp_token); 1094 lwkt_reltoken(&unp_token); 1095 } 1096 #endif 1097 1098 int 1099 unp_externalize(struct mbuf *rights) 1100 { 1101 struct thread *td = curthread; 1102 struct proc *p = td->td_proc; /* XXX */ 1103 struct lwp *lp = td->td_lwp; 1104 struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 1105 int *fdp; 1106 int i; 1107 struct file **rp; 1108 struct file *fp; 1109 int newfds = (cm->cmsg_len - (CMSG_DATA(cm) - (u_char *)cm)) 1110 / sizeof (struct file *); 1111 int f; 1112 1113 lwkt_gettoken(&unp_token); 1114 1115 /* 1116 * if the new FD's will not fit, then we free them all 1117 */ 1118 if (!fdavail(p, newfds)) { 1119 rp = (struct file **)CMSG_DATA(cm); 1120 for (i = 0; i < newfds; i++) { 1121 fp = *rp; 1122 /* 1123 * zero the pointer before calling unp_discard, 1124 * since it may end up in unp_gc().. 1125 */ 1126 *rp++ = 0; 1127 unp_discard(fp, NULL); 1128 } 1129 lwkt_reltoken(&unp_token); 1130 return (EMSGSIZE); 1131 } 1132 1133 /* 1134 * now change each pointer to an fd in the global table to 1135 * an integer that is the index to the local fd table entry 1136 * that we set up to point to the global one we are transferring. 1137 * If sizeof (struct file *) is bigger than or equal to sizeof int, 1138 * then do it in forward order. In that case, an integer will 1139 * always come in the same place or before its corresponding 1140 * struct file pointer. 1141 * If sizeof (struct file *) is smaller than sizeof int, then 1142 * do it in reverse order. 1143 */ 1144 if (sizeof (struct file *) >= sizeof (int)) { 1145 fdp = (int *)CMSG_DATA(cm); 1146 rp = (struct file **)CMSG_DATA(cm); 1147 for (i = 0; i < newfds; i++) { 1148 if (fdalloc(p, 0, &f)) 1149 panic("unp_externalize"); 1150 fp = *rp++; 1151 unp_fp_externalize(lp, fp, f); 1152 *fdp++ = f; 1153 } 1154 } else { 1155 fdp = (int *)CMSG_DATA(cm) + newfds - 1; 1156 rp = (struct file **)CMSG_DATA(cm) + newfds - 1; 1157 for (i = 0; i < newfds; i++) { 1158 if (fdalloc(p, 0, &f)) 1159 panic("unp_externalize"); 1160 fp = *rp--; 1161 unp_fp_externalize(lp, fp, f); 1162 *fdp-- = f; 1163 } 1164 } 1165 1166 /* 1167 * Adjust length, in case sizeof(struct file *) and sizeof(int) 1168 * differs. 1169 */ 1170 cm->cmsg_len = CMSG_LEN(newfds * sizeof(int)); 1171 rights->m_len = cm->cmsg_len; 1172 1173 lwkt_reltoken(&unp_token); 1174 return (0); 1175 } 1176 1177 static void 1178 unp_fp_externalize(struct lwp *lp, struct file *fp, int fd) 1179 { 1180 struct file *fx; 1181 int error; 1182 1183 lwkt_gettoken(&unp_token); 1184 1185 if (lp) { 1186 KKASSERT(fd >= 0); 1187 if (fp->f_flag & FREVOKED) { 1188 kprintf("Warning: revoked fp exiting unix socket\n"); 1189 fx = NULL; 1190 error = falloc(lp, &fx, NULL); 1191 if (error == 0) 1192 fsetfd(lp->lwp_proc->p_fd, fx, fd); 1193 else 1194 fsetfd(lp->lwp_proc->p_fd, NULL, fd); 1195 fdrop(fx); 1196 } else { 1197 fsetfd(lp->lwp_proc->p_fd, fp, fd); 1198 } 1199 } 1200 spin_lock(&unp_spin); 1201 fp->f_msgcount--; 1202 unp_rights--; 1203 spin_unlock(&unp_spin); 1204 fdrop(fp); 1205 1206 lwkt_reltoken(&unp_token); 1207 } 1208 1209 1210 void 1211 unp_init(void) 1212 { 1213 LIST_INIT(&unp_dhead); 1214 LIST_INIT(&unp_shead); 1215 spin_init(&unp_spin); 1216 } 1217 1218 static int 1219 unp_internalize(struct mbuf *control, struct thread *td) 1220 { 1221 struct proc *p = td->td_proc; 1222 struct filedesc *fdescp; 1223 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1224 struct file **rp; 1225 struct file *fp; 1226 int i, fd, *fdp; 1227 struct cmsgcred *cmcred; 1228 int oldfds; 1229 u_int newlen; 1230 int error; 1231 1232 KKASSERT(p); 1233 lwkt_gettoken(&unp_token); 1234 1235 fdescp = p->p_fd; 1236 if ((cm->cmsg_type != SCM_RIGHTS && cm->cmsg_type != SCM_CREDS) || 1237 cm->cmsg_level != SOL_SOCKET || 1238 CMSG_ALIGN(cm->cmsg_len) != control->m_len) { 1239 error = EINVAL; 1240 goto done; 1241 } 1242 1243 /* 1244 * Fill in credential information. 1245 */ 1246 if (cm->cmsg_type == SCM_CREDS) { 1247 cmcred = (struct cmsgcred *)CMSG_DATA(cm); 1248 cmcred->cmcred_pid = p->p_pid; 1249 cmcred->cmcred_uid = p->p_ucred->cr_ruid; 1250 cmcred->cmcred_gid = p->p_ucred->cr_rgid; 1251 cmcred->cmcred_euid = p->p_ucred->cr_uid; 1252 cmcred->cmcred_ngroups = MIN(p->p_ucred->cr_ngroups, 1253 CMGROUP_MAX); 1254 for (i = 0; i < cmcred->cmcred_ngroups; i++) 1255 cmcred->cmcred_groups[i] = p->p_ucred->cr_groups[i]; 1256 error = 0; 1257 goto done; 1258 } 1259 1260 /* 1261 * cmsghdr may not be aligned, do not allow calculation(s) to 1262 * go negative. 1263 */ 1264 if (cm->cmsg_len < CMSG_LEN(0)) { 1265 error = EINVAL; 1266 goto done; 1267 } 1268 1269 oldfds = (cm->cmsg_len - CMSG_LEN(0)) / sizeof (int); 1270 1271 /* 1272 * check that all the FDs passed in refer to legal OPEN files 1273 * If not, reject the entire operation. 1274 */ 1275 fdp = (int *)CMSG_DATA(cm); 1276 for (i = 0; i < oldfds; i++) { 1277 fd = *fdp++; 1278 if ((unsigned)fd >= fdescp->fd_nfiles || 1279 fdescp->fd_files[fd].fp == NULL) { 1280 error = EBADF; 1281 goto done; 1282 } 1283 if (fdescp->fd_files[fd].fp->f_type == DTYPE_KQUEUE) { 1284 error = EOPNOTSUPP; 1285 goto done; 1286 } 1287 } 1288 /* 1289 * Now replace the integer FDs with pointers to 1290 * the associated global file table entry.. 1291 * Allocate a bigger buffer as necessary. But if an cluster is not 1292 * enough, return E2BIG. 1293 */ 1294 newlen = CMSG_LEN(oldfds * sizeof(struct file *)); 1295 if (newlen > MCLBYTES) { 1296 error = E2BIG; 1297 goto done; 1298 } 1299 if (newlen - control->m_len > M_TRAILINGSPACE(control)) { 1300 if (control->m_flags & M_EXT) { 1301 error = E2BIG; 1302 goto done; 1303 } 1304 MCLGET(control, MB_WAIT); 1305 if (!(control->m_flags & M_EXT)) { 1306 error = ENOBUFS; 1307 goto done; 1308 } 1309 1310 /* copy the data to the cluster */ 1311 memcpy(mtod(control, char *), cm, cm->cmsg_len); 1312 cm = mtod(control, struct cmsghdr *); 1313 } 1314 1315 /* 1316 * Adjust length, in case sizeof(struct file *) and sizeof(int) 1317 * differs. 1318 */ 1319 cm->cmsg_len = newlen; 1320 control->m_len = CMSG_ALIGN(newlen); 1321 1322 /* 1323 * Transform the file descriptors into struct file pointers. 1324 * If sizeof (struct file *) is bigger than or equal to sizeof int, 1325 * then do it in reverse order so that the int won't get until 1326 * we're done. 1327 * If sizeof (struct file *) is smaller than sizeof int, then 1328 * do it in forward order. 1329 */ 1330 if (sizeof (struct file *) >= sizeof (int)) { 1331 fdp = (int *)CMSG_DATA(cm) + oldfds - 1; 1332 rp = (struct file **)CMSG_DATA(cm) + oldfds - 1; 1333 for (i = 0; i < oldfds; i++) { 1334 fp = fdescp->fd_files[*fdp--].fp; 1335 *rp-- = fp; 1336 fhold(fp); 1337 spin_lock(&unp_spin); 1338 fp->f_msgcount++; 1339 unp_rights++; 1340 spin_unlock(&unp_spin); 1341 } 1342 } else { 1343 fdp = (int *)CMSG_DATA(cm); 1344 rp = (struct file **)CMSG_DATA(cm); 1345 for (i = 0; i < oldfds; i++) { 1346 fp = fdescp->fd_files[*fdp++].fp; 1347 *rp++ = fp; 1348 fhold(fp); 1349 spin_lock(&unp_spin); 1350 fp->f_msgcount++; 1351 unp_rights++; 1352 spin_unlock(&unp_spin); 1353 } 1354 } 1355 error = 0; 1356 done: 1357 lwkt_reltoken(&unp_token); 1358 return error; 1359 } 1360 1361 /* 1362 * Garbage collect in-transit file descriptors that get lost due to 1363 * loops (i.e. when a socket is sent to another process over itself, 1364 * and more complex situations). 1365 * 1366 * NOT MPSAFE - TODO socket flush code and maybe closef. Rest is MPSAFE. 1367 */ 1368 1369 struct unp_gc_info { 1370 struct file **extra_ref; 1371 struct file *locked_fp; 1372 int defer; 1373 int index; 1374 int maxindex; 1375 }; 1376 1377 static void 1378 unp_gc(void) 1379 { 1380 struct unp_gc_info info; 1381 static boolean_t unp_gcing; 1382 struct file **fpp; 1383 int i; 1384 1385 spin_lock(&unp_spin); 1386 if (unp_gcing) { 1387 spin_unlock(&unp_spin); 1388 return; 1389 } 1390 unp_gcing = TRUE; 1391 spin_unlock(&unp_spin); 1392 1393 lwkt_gettoken(&unp_token); 1394 1395 /* 1396 * before going through all this, set all FDs to 1397 * be NOT defered and NOT externally accessible 1398 */ 1399 info.defer = 0; 1400 allfiles_scan_exclusive(unp_gc_clearmarks, NULL); 1401 do { 1402 allfiles_scan_exclusive(unp_gc_checkmarks, &info); 1403 } while (info.defer); 1404 1405 /* 1406 * We grab an extra reference to each of the file table entries 1407 * that are not otherwise accessible and then free the rights 1408 * that are stored in messages on them. 1409 * 1410 * The bug in the orginal code is a little tricky, so I'll describe 1411 * what's wrong with it here. 1412 * 1413 * It is incorrect to simply unp_discard each entry for f_msgcount 1414 * times -- consider the case of sockets A and B that contain 1415 * references to each other. On a last close of some other socket, 1416 * we trigger a gc since the number of outstanding rights (unp_rights) 1417 * is non-zero. If during the sweep phase the gc code un_discards, 1418 * we end up doing a (full) closef on the descriptor. A closef on A 1419 * results in the following chain. Closef calls soo_close, which 1420 * calls soclose. Soclose calls first (through the switch 1421 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 1422 * returns because the previous instance had set unp_gcing, and 1423 * we return all the way back to soclose, which marks the socket 1424 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 1425 * to free up the rights that are queued in messages on the socket A, 1426 * i.e., the reference on B. The sorflush calls via the dom_dispose 1427 * switch unp_dispose, which unp_scans with unp_discard. This second 1428 * instance of unp_discard just calls closef on B. 1429 * 1430 * Well, a similar chain occurs on B, resulting in a sorflush on B, 1431 * which results in another closef on A. Unfortunately, A is already 1432 * being closed, and the descriptor has already been marked with 1433 * SS_NOFDREF, and soclose panics at this point. 1434 * 1435 * Here, we first take an extra reference to each inaccessible 1436 * descriptor. Then, we call sorflush ourself, since we know 1437 * it is a Unix domain socket anyhow. After we destroy all the 1438 * rights carried in messages, we do a last closef to get rid 1439 * of our extra reference. This is the last close, and the 1440 * unp_detach etc will shut down the socket. 1441 * 1442 * 91/09/19, bsy@cs.cmu.edu 1443 */ 1444 info.extra_ref = kmalloc(256 * sizeof(struct file *), M_FILE, M_WAITOK); 1445 info.maxindex = 256; 1446 1447 do { 1448 /* 1449 * Look for matches 1450 */ 1451 info.index = 0; 1452 allfiles_scan_exclusive(unp_gc_checkrefs, &info); 1453 1454 /* 1455 * For each FD on our hit list, do the following two things 1456 */ 1457 for (i = info.index, fpp = info.extra_ref; --i >= 0; ++fpp) { 1458 struct file *tfp = *fpp; 1459 if (tfp->f_type == DTYPE_SOCKET && tfp->f_data != NULL) 1460 sorflush((struct socket *)(tfp->f_data)); 1461 } 1462 for (i = info.index, fpp = info.extra_ref; --i >= 0; ++fpp) 1463 closef(*fpp, NULL); 1464 } while (info.index == info.maxindex); 1465 1466 lwkt_reltoken(&unp_token); 1467 1468 kfree((caddr_t)info.extra_ref, M_FILE); 1469 unp_gcing = FALSE; 1470 } 1471 1472 /* 1473 * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry 1474 */ 1475 static int 1476 unp_gc_checkrefs(struct file *fp, void *data) 1477 { 1478 struct unp_gc_info *info = data; 1479 1480 if (fp->f_count == 0) 1481 return(0); 1482 if (info->index == info->maxindex) 1483 return(-1); 1484 1485 /* 1486 * If all refs are from msgs, and it's not marked accessible 1487 * then it must be referenced from some unreachable cycle 1488 * of (shut-down) FDs, so include it in our 1489 * list of FDs to remove 1490 */ 1491 if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) { 1492 info->extra_ref[info->index++] = fp; 1493 fhold(fp); 1494 } 1495 return(0); 1496 } 1497 1498 /* 1499 * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry 1500 */ 1501 static int 1502 unp_gc_clearmarks(struct file *fp, void *data __unused) 1503 { 1504 atomic_clear_int(&fp->f_flag, FMARK | FDEFER); 1505 return(0); 1506 } 1507 1508 /* 1509 * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry 1510 */ 1511 static int 1512 unp_gc_checkmarks(struct file *fp, void *data) 1513 { 1514 struct unp_gc_info *info = data; 1515 struct socket *so; 1516 1517 /* 1518 * If the file is not open, skip it 1519 */ 1520 if (fp->f_count == 0) 1521 return(0); 1522 /* 1523 * If we already marked it as 'defer' in a 1524 * previous pass, then try process it this time 1525 * and un-mark it 1526 */ 1527 if (fp->f_flag & FDEFER) { 1528 atomic_clear_int(&fp->f_flag, FDEFER); 1529 --info->defer; 1530 } else { 1531 /* 1532 * if it's not defered, then check if it's 1533 * already marked.. if so skip it 1534 */ 1535 if (fp->f_flag & FMARK) 1536 return(0); 1537 /* 1538 * If all references are from messages 1539 * in transit, then skip it. it's not 1540 * externally accessible. 1541 */ 1542 if (fp->f_count == fp->f_msgcount) 1543 return(0); 1544 /* 1545 * If it got this far then it must be 1546 * externally accessible. 1547 */ 1548 atomic_set_int(&fp->f_flag, FMARK); 1549 } 1550 1551 /* 1552 * either it was defered, or it is externally 1553 * accessible and not already marked so. 1554 * Now check if it is possibly one of OUR sockets. 1555 */ 1556 if (fp->f_type != DTYPE_SOCKET || 1557 (so = (struct socket *)fp->f_data) == NULL) 1558 return(0); 1559 if (so->so_proto->pr_domain != &localdomain || 1560 !(so->so_proto->pr_flags & PR_RIGHTS)) 1561 return(0); 1562 #ifdef notdef 1563 if (so->so_rcv.ssb_flags & SSB_LOCK) { 1564 /* 1565 * This is problematical; it's not clear 1566 * we need to wait for the sockbuf to be 1567 * unlocked (on a uniprocessor, at least), 1568 * and it's also not clear what to do 1569 * if sbwait returns an error due to receipt 1570 * of a signal. If sbwait does return 1571 * an error, we'll go into an infinite 1572 * loop. Delete all of this for now. 1573 */ 1574 sbwait(&so->so_rcv); 1575 goto restart; 1576 } 1577 #endif 1578 /* 1579 * So, Ok, it's one of our sockets and it IS externally 1580 * accessible (or was defered). Now we look 1581 * to see if we hold any file descriptors in its 1582 * message buffers. Follow those links and mark them 1583 * as accessible too. 1584 */ 1585 info->locked_fp = fp; 1586 /* spin_lock_wr(&so->so_rcv.sb_spin); */ 1587 unp_scan(so->so_rcv.ssb_mb, unp_mark, info); 1588 /* spin_unlock_wr(&so->so_rcv.sb_spin);*/ 1589 return (0); 1590 } 1591 1592 /* 1593 * Scan all unix domain sockets and replace any revoked file pointers 1594 * found with the dummy file pointer fx. We don't worry about races 1595 * against file pointers being read out as those are handled in the 1596 * externalize code. 1597 */ 1598 1599 #define REVOKE_GC_MAXFILES 32 1600 1601 struct unp_revoke_gc_info { 1602 struct file *fx; 1603 struct file *fary[REVOKE_GC_MAXFILES]; 1604 int fcount; 1605 }; 1606 1607 void 1608 unp_revoke_gc(struct file *fx) 1609 { 1610 struct unp_revoke_gc_info info; 1611 int i; 1612 1613 lwkt_gettoken(&unp_token); 1614 info.fx = fx; 1615 do { 1616 info.fcount = 0; 1617 allfiles_scan_exclusive(unp_revoke_gc_check, &info); 1618 for (i = 0; i < info.fcount; ++i) 1619 unp_fp_externalize(NULL, info.fary[i], -1); 1620 } while (info.fcount == REVOKE_GC_MAXFILES); 1621 lwkt_reltoken(&unp_token); 1622 } 1623 1624 /* 1625 * Check for and replace revoked descriptors. 1626 * 1627 * WARNING: This routine is not allowed to block. 1628 */ 1629 static int 1630 unp_revoke_gc_check(struct file *fps, void *vinfo) 1631 { 1632 struct unp_revoke_gc_info *info = vinfo; 1633 struct file *fp; 1634 struct socket *so; 1635 struct mbuf *m0; 1636 struct mbuf *m; 1637 struct file **rp; 1638 struct cmsghdr *cm; 1639 int i; 1640 int qfds; 1641 1642 /* 1643 * Is this a unix domain socket with rights-passing abilities? 1644 */ 1645 if (fps->f_type != DTYPE_SOCKET) 1646 return (0); 1647 if ((so = (struct socket *)fps->f_data) == NULL) 1648 return(0); 1649 if (so->so_proto->pr_domain != &localdomain) 1650 return(0); 1651 if ((so->so_proto->pr_flags & PR_RIGHTS) == 0) 1652 return(0); 1653 1654 /* 1655 * Scan the mbufs for control messages and replace any revoked 1656 * descriptors we find. 1657 */ 1658 m0 = so->so_rcv.ssb_mb; 1659 while (m0) { 1660 for (m = m0; m; m = m->m_next) { 1661 if (m->m_type != MT_CONTROL) 1662 continue; 1663 if (m->m_len < sizeof(*cm)) 1664 continue; 1665 cm = mtod(m, struct cmsghdr *); 1666 if (cm->cmsg_level != SOL_SOCKET || 1667 cm->cmsg_type != SCM_RIGHTS) { 1668 continue; 1669 } 1670 qfds = (cm->cmsg_len - CMSG_LEN(0)) / sizeof(void *); 1671 rp = (struct file **)CMSG_DATA(cm); 1672 for (i = 0; i < qfds; i++) { 1673 fp = rp[i]; 1674 if (fp->f_flag & FREVOKED) { 1675 kprintf("Warning: Removing revoked fp from unix domain socket queue\n"); 1676 fhold(info->fx); 1677 info->fx->f_msgcount++; 1678 unp_rights++; 1679 rp[i] = info->fx; 1680 info->fary[info->fcount++] = fp; 1681 } 1682 if (info->fcount == REVOKE_GC_MAXFILES) 1683 break; 1684 } 1685 if (info->fcount == REVOKE_GC_MAXFILES) 1686 break; 1687 } 1688 m0 = m0->m_nextpkt; 1689 if (info->fcount == REVOKE_GC_MAXFILES) 1690 break; 1691 } 1692 1693 /* 1694 * Stop the scan if we filled up our array. 1695 */ 1696 if (info->fcount == REVOKE_GC_MAXFILES) 1697 return(-1); 1698 return(0); 1699 } 1700 1701 void 1702 unp_dispose(struct mbuf *m) 1703 { 1704 lwkt_gettoken(&unp_token); 1705 if (m) 1706 unp_scan(m, unp_discard, NULL); 1707 lwkt_reltoken(&unp_token); 1708 } 1709 1710 static int 1711 unp_listen(struct unpcb *unp, struct thread *td) 1712 { 1713 struct proc *p = td->td_proc; 1714 1715 KKASSERT(p); 1716 lwkt_gettoken(&unp_token); 1717 cru2x(p->p_ucred, &unp->unp_peercred); 1718 unp->unp_flags |= UNP_HAVEPCCACHED; 1719 lwkt_reltoken(&unp_token); 1720 return (0); 1721 } 1722 1723 static void 1724 unp_scan(struct mbuf *m0, void (*op)(struct file *, void *), void *data) 1725 { 1726 struct mbuf *m; 1727 struct file **rp; 1728 struct cmsghdr *cm; 1729 int i; 1730 int qfds; 1731 1732 while (m0) { 1733 for (m = m0; m; m = m->m_next) { 1734 if (m->m_type == MT_CONTROL && 1735 m->m_len >= sizeof(*cm)) { 1736 cm = mtod(m, struct cmsghdr *); 1737 if (cm->cmsg_level != SOL_SOCKET || 1738 cm->cmsg_type != SCM_RIGHTS) 1739 continue; 1740 qfds = (cm->cmsg_len - CMSG_LEN(0)) / 1741 sizeof(void *); 1742 rp = (struct file **)CMSG_DATA(cm); 1743 for (i = 0; i < qfds; i++) 1744 (*op)(*rp++, data); 1745 break; /* XXX, but saves time */ 1746 } 1747 } 1748 m0 = m0->m_nextpkt; 1749 } 1750 } 1751 1752 static void 1753 unp_mark(struct file *fp, void *data) 1754 { 1755 struct unp_gc_info *info = data; 1756 1757 if ((fp->f_flag & FMARK) == 0) { 1758 ++info->defer; 1759 atomic_set_int(&fp->f_flag, FMARK | FDEFER); 1760 } 1761 } 1762 1763 static void 1764 unp_discard(struct file *fp, void *data __unused) 1765 { 1766 spin_lock(&unp_spin); 1767 fp->f_msgcount--; 1768 unp_rights--; 1769 spin_unlock(&unp_spin); 1770 closef(fp, NULL); 1771 } 1772 1773