1 /* 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 34 * $FreeBSD: src/sys/kern/uipc_usrreq.c,v 1.54.2.10 2003/03/04 17:28:09 nectar Exp $ 35 * $DragonFly: src/sys/kern/uipc_usrreq.c,v 1.44 2008/09/06 05:44:58 dillon Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/kernel.h> 41 #include <sys/domain.h> 42 #include <sys/fcntl.h> 43 #include <sys/malloc.h> /* XXX must be before <sys/file.h> */ 44 #include <sys/proc.h> 45 #include <sys/file.h> 46 #include <sys/filedesc.h> 47 #include <sys/mbuf.h> 48 #include <sys/nlookup.h> 49 #include <sys/protosw.h> 50 #include <sys/socket.h> 51 #include <sys/socketvar.h> 52 #include <sys/resourcevar.h> 53 #include <sys/stat.h> 54 #include <sys/mount.h> 55 #include <sys/sysctl.h> 56 #include <sys/un.h> 57 #include <sys/unpcb.h> 58 #include <sys/vnode.h> 59 60 #include <sys/file2.h> 61 #include <sys/spinlock2.h> 62 #include <sys/socketvar2.h> 63 #include <sys/msgport2.h> 64 65 static MALLOC_DEFINE(M_UNPCB, "unpcb", "unpcb struct"); 66 static unp_gen_t unp_gencnt; 67 static u_int unp_count; 68 69 static struct unp_head unp_shead, unp_dhead; 70 71 static struct lwkt_token unp_token = LWKT_TOKEN_MP_INITIALIZER(unp_token); 72 73 /* 74 * Unix communications domain. 75 * 76 * TODO: 77 * RDM 78 * rethink name space problems 79 * need a proper out-of-band 80 * lock pushdown 81 */ 82 static struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; 83 static ino_t unp_ino = 1; /* prototype for fake inode numbers */ 84 static struct spinlock unp_ino_spin = SPINLOCK_INITIALIZER(&unp_ino_spin); 85 86 static int unp_attach (struct socket *, struct pru_attach_info *); 87 static void unp_detach (struct unpcb *); 88 static int unp_bind (struct unpcb *,struct sockaddr *, struct thread *); 89 static int unp_connect (struct socket *,struct sockaddr *, 90 struct thread *); 91 static void unp_disconnect (struct unpcb *); 92 static void unp_shutdown (struct unpcb *); 93 static void unp_drop (struct unpcb *, int); 94 static void unp_gc (void); 95 static int unp_gc_clearmarks(struct file *, void *); 96 static int unp_gc_checkmarks(struct file *, void *); 97 static int unp_gc_checkrefs(struct file *, void *); 98 static int unp_revoke_gc_check(struct file *, void *); 99 static void unp_scan (struct mbuf *, void (*)(struct file *, void *), 100 void *data); 101 static void unp_mark (struct file *, void *data); 102 static void unp_discard (struct file *, void *); 103 static int unp_internalize (struct mbuf *, struct thread *); 104 static int unp_listen (struct unpcb *, struct thread *); 105 static void unp_fp_externalize(struct lwp *lp, struct file *fp, int fd); 106 107 /* 108 * NOTE: (so) is referenced from soabort*() and netmsg_pru_abort() 109 * will sofree() it when we return. 110 */ 111 static void 112 uipc_abort(netmsg_t msg) 113 { 114 struct unpcb *unp; 115 int error; 116 117 lwkt_gettoken(&unp_token); 118 unp = msg->base.nm_so->so_pcb; 119 if (unp) { 120 unp_drop(unp, ECONNABORTED); 121 unp_detach(unp); 122 error = 0; 123 } else { 124 error = EINVAL; 125 } 126 lwkt_reltoken(&unp_token); 127 128 lwkt_replymsg(&msg->lmsg, error); 129 } 130 131 static void 132 uipc_accept(netmsg_t msg) 133 { 134 struct unpcb *unp; 135 int error; 136 137 lwkt_gettoken(&unp_token); 138 unp = msg->base.nm_so->so_pcb; 139 if (unp == NULL) { 140 error = EINVAL; 141 } else { 142 /* 143 * Pass back name of connected socket, 144 * if it was bound and we are still connected 145 * (our peer may have closed already!). 146 */ 147 if (unp->unp_conn && unp->unp_conn->unp_addr) { 148 *msg->accept.nm_nam = dup_sockaddr( 149 (struct sockaddr *)unp->unp_conn->unp_addr); 150 } else { 151 *msg->accept.nm_nam = dup_sockaddr( 152 (struct sockaddr *)&sun_noname); 153 } 154 error = 0; 155 } 156 lwkt_reltoken(&unp_token); 157 lwkt_replymsg(&msg->lmsg, error); 158 } 159 160 static void 161 uipc_attach(netmsg_t msg) 162 { 163 struct unpcb *unp; 164 int error; 165 166 lwkt_gettoken(&unp_token); 167 unp = msg->base.nm_so->so_pcb; 168 if (unp) 169 error = EISCONN; 170 else 171 error = unp_attach(msg->base.nm_so, msg->attach.nm_ai); 172 lwkt_reltoken(&unp_token); 173 lwkt_replymsg(&msg->lmsg, error); 174 } 175 176 static void 177 uipc_bind(netmsg_t msg) 178 { 179 struct unpcb *unp; 180 int error; 181 182 lwkt_gettoken(&unp_token); 183 unp = msg->base.nm_so->so_pcb; 184 if (unp) 185 error = unp_bind(unp, msg->bind.nm_nam, msg->bind.nm_td); 186 else 187 error = EINVAL; 188 lwkt_reltoken(&unp_token); 189 lwkt_replymsg(&msg->lmsg, error); 190 } 191 192 static void 193 uipc_connect(netmsg_t msg) 194 { 195 struct unpcb *unp; 196 int error; 197 198 lwkt_gettoken(&unp_token); 199 unp = msg->base.nm_so->so_pcb; 200 if (unp) { 201 error = unp_connect(msg->base.nm_so, 202 msg->connect.nm_nam, 203 msg->connect.nm_td); 204 } else { 205 error = EINVAL; 206 } 207 lwkt_reltoken(&unp_token); 208 lwkt_replymsg(&msg->lmsg, error); 209 } 210 211 static void 212 uipc_connect2(netmsg_t msg) 213 { 214 struct unpcb *unp; 215 int error; 216 217 lwkt_gettoken(&unp_token); 218 unp = msg->connect2.nm_so1->so_pcb; 219 if (unp) { 220 error = unp_connect2(msg->connect2.nm_so1, 221 msg->connect2.nm_so2); 222 } else { 223 error = EINVAL; 224 } 225 lwkt_reltoken(&unp_token); 226 lwkt_replymsg(&msg->lmsg, error); 227 } 228 229 /* control is EOPNOTSUPP */ 230 231 static void 232 uipc_detach(netmsg_t msg) 233 { 234 struct unpcb *unp; 235 int error; 236 237 lwkt_gettoken(&unp_token); 238 unp = msg->base.nm_so->so_pcb; 239 if (unp) { 240 unp_detach(unp); 241 error = 0; 242 } else { 243 error = EINVAL; 244 } 245 lwkt_reltoken(&unp_token); 246 lwkt_replymsg(&msg->lmsg, error); 247 } 248 249 static void 250 uipc_disconnect(netmsg_t msg) 251 { 252 struct unpcb *unp; 253 int error; 254 255 lwkt_gettoken(&unp_token); 256 unp = msg->base.nm_so->so_pcb; 257 if (unp) { 258 unp_disconnect(unp); 259 error = 0; 260 } else { 261 error = EINVAL; 262 } 263 lwkt_reltoken(&unp_token); 264 lwkt_replymsg(&msg->lmsg, error); 265 } 266 267 static void 268 uipc_listen(netmsg_t msg) 269 { 270 struct unpcb *unp; 271 int error; 272 273 lwkt_gettoken(&unp_token); 274 unp = msg->base.nm_so->so_pcb; 275 if (unp == NULL || unp->unp_vnode == NULL) 276 error = EINVAL; 277 else 278 error = unp_listen(unp, msg->listen.nm_td); 279 lwkt_reltoken(&unp_token); 280 lwkt_replymsg(&msg->lmsg, error); 281 } 282 283 static void 284 uipc_peeraddr(netmsg_t msg) 285 { 286 struct unpcb *unp; 287 int error; 288 289 lwkt_gettoken(&unp_token); 290 unp = msg->base.nm_so->so_pcb; 291 if (unp == NULL) { 292 error = EINVAL; 293 } else if (unp->unp_conn && unp->unp_conn->unp_addr) { 294 *msg->peeraddr.nm_nam = dup_sockaddr( 295 (struct sockaddr *)unp->unp_conn->unp_addr); 296 error = 0; 297 } else { 298 /* 299 * XXX: It seems that this test always fails even when 300 * connection is established. So, this else clause is 301 * added as workaround to return PF_LOCAL sockaddr. 302 */ 303 *msg->peeraddr.nm_nam = dup_sockaddr( 304 (struct sockaddr *)&sun_noname); 305 error = 0; 306 } 307 lwkt_reltoken(&unp_token); 308 lwkt_replymsg(&msg->lmsg, error); 309 } 310 311 static void 312 uipc_rcvd(netmsg_t msg) 313 { 314 struct unpcb *unp; 315 struct socket *so; 316 struct socket *so2; 317 int error; 318 319 lwkt_gettoken(&unp_token); 320 so = msg->base.nm_so; 321 unp = so->so_pcb; 322 if (unp == NULL) { 323 error = EINVAL; 324 goto done; 325 } 326 327 switch (so->so_type) { 328 case SOCK_DGRAM: 329 panic("uipc_rcvd DGRAM?"); 330 /*NOTREACHED*/ 331 case SOCK_STREAM: 332 case SOCK_SEQPACKET: 333 if (unp->unp_conn == NULL) 334 break; 335 /* 336 * Because we are transfering mbufs directly to the 337 * peer socket we have to use SSB_STOP on the sender 338 * to prevent it from building up infinite mbufs. 339 */ 340 so2 = unp->unp_conn->unp_socket; 341 if (so->so_rcv.ssb_cc < so2->so_snd.ssb_hiwat && 342 so->so_rcv.ssb_mbcnt < so2->so_snd.ssb_mbmax 343 ) { 344 atomic_clear_int(&so2->so_snd.ssb_flags, SSB_STOP); 345 sowwakeup(so2); 346 } 347 break; 348 default: 349 panic("uipc_rcvd unknown socktype"); 350 /*NOTREACHED*/ 351 } 352 error = 0; 353 done: 354 lwkt_reltoken(&unp_token); 355 lwkt_replymsg(&msg->lmsg, error); 356 } 357 358 /* pru_rcvoob is EOPNOTSUPP */ 359 360 static void 361 uipc_send(netmsg_t msg) 362 { 363 struct unpcb *unp; 364 struct socket *so; 365 struct socket *so2; 366 struct mbuf *control; 367 struct mbuf *m; 368 int error = 0; 369 370 lwkt_gettoken(&unp_token); 371 so = msg->base.nm_so; 372 control = msg->send.nm_control; 373 m = msg->send.nm_m; 374 unp = so->so_pcb; 375 376 if (unp == NULL) { 377 error = EINVAL; 378 goto release; 379 } 380 if (msg->send.nm_flags & PRUS_OOB) { 381 error = EOPNOTSUPP; 382 goto release; 383 } 384 385 if (control && (error = unp_internalize(control, msg->send.nm_td))) 386 goto release; 387 388 switch (so->so_type) { 389 case SOCK_DGRAM: 390 { 391 struct sockaddr *from; 392 393 if (msg->send.nm_addr) { 394 if (unp->unp_conn) { 395 error = EISCONN; 396 break; 397 } 398 error = unp_connect(so, 399 msg->send.nm_addr, 400 msg->send.nm_td); 401 if (error) 402 break; 403 } else { 404 if (unp->unp_conn == NULL) { 405 error = ENOTCONN; 406 break; 407 } 408 } 409 so2 = unp->unp_conn->unp_socket; 410 if (unp->unp_addr) 411 from = (struct sockaddr *)unp->unp_addr; 412 else 413 from = &sun_noname; 414 415 lwkt_gettoken(&so2->so_rcv.ssb_token); 416 if (ssb_appendaddr(&so2->so_rcv, from, m, control)) { 417 sorwakeup(so2); 418 m = NULL; 419 control = NULL; 420 } else { 421 error = ENOBUFS; 422 } 423 if (msg->send.nm_addr) 424 unp_disconnect(unp); 425 lwkt_reltoken(&so2->so_rcv.ssb_token); 426 break; 427 } 428 429 case SOCK_STREAM: 430 case SOCK_SEQPACKET: 431 /* Connect if not connected yet. */ 432 /* 433 * Note: A better implementation would complain 434 * if not equal to the peer's address. 435 */ 436 if (!(so->so_state & SS_ISCONNECTED)) { 437 if (msg->send.nm_addr) { 438 error = unp_connect(so, 439 msg->send.nm_addr, 440 msg->send.nm_td); 441 if (error) 442 break; /* XXX */ 443 } else { 444 error = ENOTCONN; 445 break; 446 } 447 } 448 449 if (so->so_state & SS_CANTSENDMORE) { 450 error = EPIPE; 451 break; 452 } 453 if (unp->unp_conn == NULL) 454 panic("uipc_send connected but no connection?"); 455 so2 = unp->unp_conn->unp_socket; 456 /* 457 * Send to paired receive port, and then reduce 458 * send buffer hiwater marks to maintain backpressure. 459 * Wake up readers. 460 */ 461 lwkt_gettoken(&so2->so_rcv.ssb_token); 462 if (control) { 463 if (ssb_appendcontrol(&so2->so_rcv, m, control)) { 464 control = NULL; 465 m = NULL; 466 } 467 } else if (so->so_type == SOCK_SEQPACKET) { 468 sbappendrecord(&so2->so_rcv.sb, m); 469 m = NULL; 470 } else { 471 sbappend(&so2->so_rcv.sb, m); 472 m = NULL; 473 } 474 475 /* 476 * Because we are transfering mbufs directly to the 477 * peer socket we have to use SSB_STOP on the sender 478 * to prevent it from building up infinite mbufs. 479 */ 480 if (so2->so_rcv.ssb_cc >= so->so_snd.ssb_hiwat || 481 so2->so_rcv.ssb_mbcnt >= so->so_snd.ssb_mbmax 482 ) { 483 atomic_set_int(&so->so_snd.ssb_flags, SSB_STOP); 484 } 485 lwkt_reltoken(&so2->so_rcv.ssb_token); 486 sorwakeup(so2); 487 break; 488 489 default: 490 panic("uipc_send unknown socktype"); 491 } 492 493 /* 494 * SEND_EOF is equivalent to a SEND followed by a SHUTDOWN. 495 */ 496 if (msg->send.nm_flags & PRUS_EOF) { 497 socantsendmore(so); 498 unp_shutdown(unp); 499 } 500 501 if (control && error != 0) 502 unp_dispose(control); 503 504 release: 505 lwkt_reltoken(&unp_token); 506 507 if (control) 508 m_freem(control); 509 if (m) 510 m_freem(m); 511 lwkt_replymsg(&msg->lmsg, error); 512 } 513 514 /* 515 * MPSAFE 516 */ 517 static void 518 uipc_sense(netmsg_t msg) 519 { 520 struct unpcb *unp; 521 struct socket *so; 522 struct stat *sb; 523 int error; 524 525 lwkt_gettoken(&unp_token); 526 so = msg->base.nm_so; 527 sb = msg->sense.nm_stat; 528 unp = so->so_pcb; 529 if (unp == NULL) { 530 error = EINVAL; 531 goto done; 532 } 533 sb->st_blksize = so->so_snd.ssb_hiwat; 534 sb->st_dev = NOUDEV; 535 if (unp->unp_ino == 0) { /* make up a non-zero inode number */ 536 spin_lock(&unp_ino_spin); 537 unp->unp_ino = unp_ino++; 538 spin_unlock(&unp_ino_spin); 539 } 540 sb->st_ino = unp->unp_ino; 541 error = 0; 542 done: 543 lwkt_reltoken(&unp_token); 544 lwkt_replymsg(&msg->lmsg, error); 545 } 546 547 static void 548 uipc_shutdown(netmsg_t msg) 549 { 550 struct socket *so; 551 struct unpcb *unp; 552 int error; 553 554 lwkt_gettoken(&unp_token); 555 so = msg->base.nm_so; 556 unp = so->so_pcb; 557 if (unp) { 558 socantsendmore(so); 559 unp_shutdown(unp); 560 error = 0; 561 } else { 562 error = EINVAL; 563 } 564 lwkt_reltoken(&unp_token); 565 lwkt_replymsg(&msg->lmsg, error); 566 } 567 568 static void 569 uipc_sockaddr(netmsg_t msg) 570 { 571 struct unpcb *unp; 572 int error; 573 574 lwkt_gettoken(&unp_token); 575 unp = msg->base.nm_so->so_pcb; 576 if (unp) { 577 if (unp->unp_addr) { 578 *msg->sockaddr.nm_nam = 579 dup_sockaddr((struct sockaddr *)unp->unp_addr); 580 } 581 error = 0; 582 } else { 583 error = EINVAL; 584 } 585 lwkt_reltoken(&unp_token); 586 lwkt_replymsg(&msg->lmsg, error); 587 } 588 589 struct pr_usrreqs uipc_usrreqs = { 590 .pru_abort = uipc_abort, 591 .pru_accept = uipc_accept, 592 .pru_attach = uipc_attach, 593 .pru_bind = uipc_bind, 594 .pru_connect = uipc_connect, 595 .pru_connect2 = uipc_connect2, 596 .pru_control = pr_generic_notsupp, 597 .pru_detach = uipc_detach, 598 .pru_disconnect = uipc_disconnect, 599 .pru_listen = uipc_listen, 600 .pru_peeraddr = uipc_peeraddr, 601 .pru_rcvd = uipc_rcvd, 602 .pru_rcvoob = pr_generic_notsupp, 603 .pru_send = uipc_send, 604 .pru_sense = uipc_sense, 605 .pru_shutdown = uipc_shutdown, 606 .pru_sockaddr = uipc_sockaddr, 607 .pru_sosend = sosend, 608 .pru_soreceive = soreceive 609 }; 610 611 void 612 uipc_ctloutput(netmsg_t msg) 613 { 614 struct socket *so; 615 struct sockopt *sopt; 616 struct unpcb *unp; 617 int error = 0; 618 619 lwkt_gettoken(&unp_token); 620 so = msg->base.nm_so; 621 sopt = msg->ctloutput.nm_sopt; 622 unp = so->so_pcb; 623 624 switch (sopt->sopt_dir) { 625 case SOPT_GET: 626 switch (sopt->sopt_name) { 627 case LOCAL_PEERCRED: 628 if (unp->unp_flags & UNP_HAVEPC) 629 soopt_from_kbuf(sopt, &unp->unp_peercred, 630 sizeof(unp->unp_peercred)); 631 else { 632 if (so->so_type == SOCK_STREAM) 633 error = ENOTCONN; 634 else if (so->so_type == SOCK_SEQPACKET) 635 error = ENOTCONN; 636 else 637 error = EINVAL; 638 } 639 break; 640 default: 641 error = EOPNOTSUPP; 642 break; 643 } 644 break; 645 case SOPT_SET: 646 default: 647 error = EOPNOTSUPP; 648 break; 649 } 650 lwkt_reltoken(&unp_token); 651 lwkt_replymsg(&msg->lmsg, error); 652 } 653 654 /* 655 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 656 * for stream sockets, although the total for sender and receiver is 657 * actually only PIPSIZ. 658 * 659 * Datagram sockets really use the sendspace as the maximum datagram size, 660 * and don't really want to reserve the sendspace. Their recvspace should 661 * be large enough for at least one max-size datagram plus address. 662 * 663 * We want the local send/recv space to be significant larger then lo0's 664 * mtu of 16384. 665 */ 666 #ifndef PIPSIZ 667 #define PIPSIZ 57344 668 #endif 669 static u_long unpst_sendspace = PIPSIZ; 670 static u_long unpst_recvspace = PIPSIZ; 671 static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 672 static u_long unpdg_recvspace = 4*1024; 673 674 static int unp_rights; /* file descriptors in flight */ 675 static struct spinlock unp_spin = SPINLOCK_INITIALIZER(&unp_spin); 676 677 SYSCTL_DECL(_net_local_seqpacket); 678 SYSCTL_DECL(_net_local_stream); 679 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, 680 &unpst_sendspace, 0, ""); 681 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, 682 &unpst_recvspace, 0, ""); 683 684 SYSCTL_DECL(_net_local_dgram); 685 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, 686 &unpdg_sendspace, 0, ""); 687 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, 688 &unpdg_recvspace, 0, ""); 689 690 SYSCTL_DECL(_net_local); 691 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, ""); 692 693 static int 694 unp_attach(struct socket *so, struct pru_attach_info *ai) 695 { 696 struct unpcb *unp; 697 int error; 698 699 lwkt_gettoken(&unp_token); 700 701 if (so->so_snd.ssb_hiwat == 0 || so->so_rcv.ssb_hiwat == 0) { 702 switch (so->so_type) { 703 704 case SOCK_STREAM: 705 case SOCK_SEQPACKET: 706 error = soreserve(so, unpst_sendspace, unpst_recvspace, 707 ai->sb_rlimit); 708 break; 709 710 case SOCK_DGRAM: 711 error = soreserve(so, unpdg_sendspace, unpdg_recvspace, 712 ai->sb_rlimit); 713 break; 714 715 default: 716 panic("unp_attach"); 717 } 718 if (error) 719 goto failed; 720 } 721 unp = kmalloc(sizeof(*unp), M_UNPCB, M_NOWAIT|M_ZERO); 722 if (unp == NULL) { 723 error = ENOBUFS; 724 goto failed; 725 } 726 unp->unp_gencnt = ++unp_gencnt; 727 unp_count++; 728 LIST_INIT(&unp->unp_refs); 729 unp->unp_socket = so; 730 unp->unp_rvnode = ai->fd_rdir; /* jail cruft XXX JH */ 731 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead 732 : &unp_shead, unp, unp_link); 733 so->so_pcb = (caddr_t)unp; 734 soreference(so); 735 error = 0; 736 failed: 737 lwkt_reltoken(&unp_token); 738 return error; 739 } 740 741 static void 742 unp_detach(struct unpcb *unp) 743 { 744 struct socket *so; 745 746 lwkt_gettoken(&unp_token); 747 748 LIST_REMOVE(unp, unp_link); 749 unp->unp_gencnt = ++unp_gencnt; 750 --unp_count; 751 if (unp->unp_vnode) { 752 unp->unp_vnode->v_socket = NULL; 753 vrele(unp->unp_vnode); 754 unp->unp_vnode = NULL; 755 } 756 if (unp->unp_conn) 757 unp_disconnect(unp); 758 while (!LIST_EMPTY(&unp->unp_refs)) 759 unp_drop(LIST_FIRST(&unp->unp_refs), ECONNRESET); 760 soisdisconnected(unp->unp_socket); 761 so = unp->unp_socket; 762 soreference(so); /* for delayed sorflush */ 763 so->so_pcb = NULL; 764 unp->unp_socket = NULL; 765 sofree(so); /* remove pcb ref */ 766 767 if (unp_rights) { 768 /* 769 * Normally the receive buffer is flushed later, 770 * in sofree, but if our receive buffer holds references 771 * to descriptors that are now garbage, we will dispose 772 * of those descriptor references after the garbage collector 773 * gets them (resulting in a "panic: closef: count < 0"). 774 */ 775 sorflush(so); 776 unp_gc(); 777 } 778 sofree(so); 779 lwkt_reltoken(&unp_token); 780 781 if (unp->unp_addr) 782 kfree(unp->unp_addr, M_SONAME); 783 kfree(unp, M_UNPCB); 784 } 785 786 static int 787 unp_bind(struct unpcb *unp, struct sockaddr *nam, struct thread *td) 788 { 789 struct proc *p = td->td_proc; 790 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 791 struct vnode *vp; 792 struct vattr vattr; 793 int error, namelen; 794 struct nlookupdata nd; 795 char buf[SOCK_MAXADDRLEN]; 796 797 lwkt_gettoken(&unp_token); 798 if (unp->unp_vnode != NULL) { 799 error = EINVAL; 800 goto failed; 801 } 802 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); 803 if (namelen <= 0) { 804 error = EINVAL; 805 goto failed; 806 } 807 strncpy(buf, soun->sun_path, namelen); 808 buf[namelen] = 0; /* null-terminate the string */ 809 error = nlookup_init(&nd, buf, UIO_SYSSPACE, 810 NLC_LOCKVP | NLC_CREATE | NLC_REFDVP); 811 if (error == 0) 812 error = nlookup(&nd); 813 if (error == 0 && nd.nl_nch.ncp->nc_vp != NULL) 814 error = EADDRINUSE; 815 if (error) 816 goto done; 817 818 VATTR_NULL(&vattr); 819 vattr.va_type = VSOCK; 820 vattr.va_mode = (ACCESSPERMS & ~p->p_fd->fd_cmask); 821 error = VOP_NCREATE(&nd.nl_nch, nd.nl_dvp, &vp, nd.nl_cred, &vattr); 822 if (error == 0) { 823 vp->v_socket = unp->unp_socket; 824 unp->unp_vnode = vp; 825 unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam); 826 vn_unlock(vp); 827 } 828 done: 829 nlookup_done(&nd); 830 failed: 831 lwkt_reltoken(&unp_token); 832 return (error); 833 } 834 835 static int 836 unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 837 { 838 struct proc *p = td->td_proc; 839 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 840 struct vnode *vp; 841 struct socket *so2, *so3; 842 struct unpcb *unp, *unp2, *unp3; 843 int error, len; 844 struct nlookupdata nd; 845 char buf[SOCK_MAXADDRLEN]; 846 847 lwkt_gettoken(&unp_token); 848 849 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); 850 if (len <= 0) { 851 error = EINVAL; 852 goto failed; 853 } 854 strncpy(buf, soun->sun_path, len); 855 buf[len] = 0; 856 857 vp = NULL; 858 error = nlookup_init(&nd, buf, UIO_SYSSPACE, NLC_FOLLOW); 859 if (error == 0) 860 error = nlookup(&nd); 861 if (error == 0) 862 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 863 nlookup_done(&nd); 864 if (error) 865 goto failed; 866 867 if (vp->v_type != VSOCK) { 868 error = ENOTSOCK; 869 goto bad; 870 } 871 error = VOP_ACCESS(vp, VWRITE, p->p_ucred); 872 if (error) 873 goto bad; 874 so2 = vp->v_socket; 875 if (so2 == NULL) { 876 error = ECONNREFUSED; 877 goto bad; 878 } 879 if (so->so_type != so2->so_type) { 880 error = EPROTOTYPE; 881 goto bad; 882 } 883 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 884 if (!(so2->so_options & SO_ACCEPTCONN) || 885 (so3 = sonewconn(so2, 0)) == NULL) { 886 error = ECONNREFUSED; 887 goto bad; 888 } 889 unp = so->so_pcb; 890 unp2 = so2->so_pcb; 891 unp3 = so3->so_pcb; 892 if (unp2->unp_addr) 893 unp3->unp_addr = (struct sockaddr_un *) 894 dup_sockaddr((struct sockaddr *)unp2->unp_addr); 895 896 /* 897 * unp_peercred management: 898 * 899 * The connecter's (client's) credentials are copied 900 * from its process structure at the time of connect() 901 * (which is now). 902 */ 903 cru2x(p->p_ucred, &unp3->unp_peercred); 904 unp3->unp_flags |= UNP_HAVEPC; 905 /* 906 * The receiver's (server's) credentials are copied 907 * from the unp_peercred member of socket on which the 908 * former called listen(); unp_listen() cached that 909 * process's credentials at that time so we can use 910 * them now. 911 */ 912 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED, 913 ("unp_connect: listener without cached peercred")); 914 memcpy(&unp->unp_peercred, &unp2->unp_peercred, 915 sizeof(unp->unp_peercred)); 916 unp->unp_flags |= UNP_HAVEPC; 917 918 so2 = so3; 919 } 920 error = unp_connect2(so, so2); 921 bad: 922 vput(vp); 923 failed: 924 lwkt_reltoken(&unp_token); 925 return (error); 926 } 927 928 int 929 unp_connect2(struct socket *so, struct socket *so2) 930 { 931 struct unpcb *unp; 932 struct unpcb *unp2; 933 934 lwkt_gettoken(&unp_token); 935 unp = so->so_pcb; 936 if (so2->so_type != so->so_type) { 937 lwkt_reltoken(&unp_token); 938 return (EPROTOTYPE); 939 } 940 unp2 = so2->so_pcb; 941 unp->unp_conn = unp2; 942 943 switch (so->so_type) { 944 case SOCK_DGRAM: 945 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); 946 soisconnected(so); 947 break; 948 949 case SOCK_STREAM: 950 case SOCK_SEQPACKET: 951 unp2->unp_conn = unp; 952 soisconnected(so); 953 soisconnected(so2); 954 break; 955 956 default: 957 panic("unp_connect2"); 958 } 959 lwkt_reltoken(&unp_token); 960 return (0); 961 } 962 963 static void 964 unp_disconnect(struct unpcb *unp) 965 { 966 struct unpcb *unp2; 967 968 lwkt_gettoken(&unp_token); 969 970 unp2 = unp->unp_conn; 971 if (unp2 == NULL) { 972 lwkt_reltoken(&unp_token); 973 return; 974 } 975 976 unp->unp_conn = NULL; 977 978 switch (unp->unp_socket->so_type) { 979 case SOCK_DGRAM: 980 LIST_REMOVE(unp, unp_reflink); 981 soclrstate(unp->unp_socket, SS_ISCONNECTED); 982 break; 983 case SOCK_STREAM: 984 case SOCK_SEQPACKET: 985 soisdisconnected(unp->unp_socket); 986 unp2->unp_conn = NULL; 987 soisdisconnected(unp2->unp_socket); 988 break; 989 } 990 lwkt_reltoken(&unp_token); 991 } 992 993 #ifdef notdef 994 void 995 unp_abort(struct unpcb *unp) 996 { 997 lwkt_gettoken(&unp_token); 998 unp_detach(unp); 999 lwkt_reltoken(&unp_token); 1000 } 1001 #endif 1002 1003 static int 1004 prison_unpcb(struct thread *td, struct unpcb *unp) 1005 { 1006 struct proc *p; 1007 1008 if (td == NULL) 1009 return (0); 1010 if ((p = td->td_proc) == NULL) 1011 return (0); 1012 if (!p->p_ucred->cr_prison) 1013 return (0); 1014 if (p->p_fd->fd_rdir == unp->unp_rvnode) 1015 return (0); 1016 return (1); 1017 } 1018 1019 static int 1020 unp_pcblist(SYSCTL_HANDLER_ARGS) 1021 { 1022 int error, i, n; 1023 struct unpcb *unp, **unp_list; 1024 unp_gen_t gencnt; 1025 struct unp_head *head; 1026 1027 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); 1028 1029 KKASSERT(curproc != NULL); 1030 1031 /* 1032 * The process of preparing the PCB list is too time-consuming and 1033 * resource-intensive to repeat twice on every request. 1034 */ 1035 if (req->oldptr == NULL) { 1036 n = unp_count; 1037 req->oldidx = (n + n/8) * sizeof(struct xunpcb); 1038 return 0; 1039 } 1040 1041 if (req->newptr != NULL) 1042 return EPERM; 1043 1044 lwkt_gettoken(&unp_token); 1045 1046 /* 1047 * OK, now we're committed to doing something. 1048 */ 1049 gencnt = unp_gencnt; 1050 n = unp_count; 1051 1052 unp_list = kmalloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); 1053 1054 for (unp = LIST_FIRST(head), i = 0; unp && i < n; 1055 unp = LIST_NEXT(unp, unp_link)) { 1056 if (unp->unp_gencnt <= gencnt && !prison_unpcb(req->td, unp)) 1057 unp_list[i++] = unp; 1058 } 1059 n = i; /* in case we lost some during malloc */ 1060 1061 error = 0; 1062 for (i = 0; i < n; i++) { 1063 unp = unp_list[i]; 1064 if (unp->unp_gencnt <= gencnt) { 1065 struct xunpcb xu; 1066 xu.xu_len = sizeof xu; 1067 xu.xu_unpp = unp; 1068 /* 1069 * XXX - need more locking here to protect against 1070 * connect/disconnect races for SMP. 1071 */ 1072 if (unp->unp_addr) 1073 bcopy(unp->unp_addr, &xu.xu_addr, 1074 unp->unp_addr->sun_len); 1075 if (unp->unp_conn && unp->unp_conn->unp_addr) 1076 bcopy(unp->unp_conn->unp_addr, 1077 &xu.xu_caddr, 1078 unp->unp_conn->unp_addr->sun_len); 1079 bcopy(unp, &xu.xu_unp, sizeof *unp); 1080 sotoxsocket(unp->unp_socket, &xu.xu_socket); 1081 error = SYSCTL_OUT(req, &xu, sizeof xu); 1082 } 1083 } 1084 lwkt_reltoken(&unp_token); 1085 kfree(unp_list, M_TEMP); 1086 1087 return error; 1088 } 1089 1090 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, 1091 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", 1092 "List of active local datagram sockets"); 1093 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, 1094 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", 1095 "List of active local stream sockets"); 1096 SYSCTL_PROC(_net_local_seqpacket, OID_AUTO, pcblist, CTLFLAG_RD, 1097 (caddr_t)(long)SOCK_SEQPACKET, 0, unp_pcblist, "S,xunpcb", 1098 "List of active local seqpacket stream sockets"); 1099 1100 static void 1101 unp_shutdown(struct unpcb *unp) 1102 { 1103 struct socket *so; 1104 1105 if ((unp->unp_socket->so_type == SOCK_STREAM || 1106 unp->unp_socket->so_type == SOCK_SEQPACKET) && 1107 unp->unp_conn != NULL && (so = unp->unp_conn->unp_socket)) { 1108 socantrcvmore(so); 1109 } 1110 } 1111 1112 static void 1113 unp_drop(struct unpcb *unp, int err) 1114 { 1115 struct socket *so = unp->unp_socket; 1116 1117 so->so_error = err; 1118 unp_disconnect(unp); 1119 } 1120 1121 #ifdef notdef 1122 void 1123 unp_drain(void) 1124 { 1125 lwkt_gettoken(&unp_token); 1126 lwkt_reltoken(&unp_token); 1127 } 1128 #endif 1129 1130 int 1131 unp_externalize(struct mbuf *rights) 1132 { 1133 struct thread *td = curthread; 1134 struct proc *p = td->td_proc; /* XXX */ 1135 struct lwp *lp = td->td_lwp; 1136 struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 1137 int *fdp; 1138 int i; 1139 struct file **rp; 1140 struct file *fp; 1141 int newfds = (cm->cmsg_len - (CMSG_DATA(cm) - (u_char *)cm)) 1142 / sizeof (struct file *); 1143 int f; 1144 1145 lwkt_gettoken(&unp_token); 1146 1147 /* 1148 * if the new FD's will not fit, then we free them all 1149 */ 1150 if (!fdavail(p, newfds)) { 1151 rp = (struct file **)CMSG_DATA(cm); 1152 for (i = 0; i < newfds; i++) { 1153 fp = *rp; 1154 /* 1155 * zero the pointer before calling unp_discard, 1156 * since it may end up in unp_gc().. 1157 */ 1158 *rp++ = 0; 1159 unp_discard(fp, NULL); 1160 } 1161 lwkt_reltoken(&unp_token); 1162 return (EMSGSIZE); 1163 } 1164 1165 /* 1166 * now change each pointer to an fd in the global table to 1167 * an integer that is the index to the local fd table entry 1168 * that we set up to point to the global one we are transferring. 1169 * If sizeof (struct file *) is bigger than or equal to sizeof int, 1170 * then do it in forward order. In that case, an integer will 1171 * always come in the same place or before its corresponding 1172 * struct file pointer. 1173 * If sizeof (struct file *) is smaller than sizeof int, then 1174 * do it in reverse order. 1175 */ 1176 if (sizeof (struct file *) >= sizeof (int)) { 1177 fdp = (int *)CMSG_DATA(cm); 1178 rp = (struct file **)CMSG_DATA(cm); 1179 for (i = 0; i < newfds; i++) { 1180 if (fdalloc(p, 0, &f)) 1181 panic("unp_externalize"); 1182 fp = *rp++; 1183 unp_fp_externalize(lp, fp, f); 1184 *fdp++ = f; 1185 } 1186 } else { 1187 fdp = (int *)CMSG_DATA(cm) + newfds - 1; 1188 rp = (struct file **)CMSG_DATA(cm) + newfds - 1; 1189 for (i = 0; i < newfds; i++) { 1190 if (fdalloc(p, 0, &f)) 1191 panic("unp_externalize"); 1192 fp = *rp--; 1193 unp_fp_externalize(lp, fp, f); 1194 *fdp-- = f; 1195 } 1196 } 1197 1198 /* 1199 * Adjust length, in case sizeof(struct file *) and sizeof(int) 1200 * differs. 1201 */ 1202 cm->cmsg_len = CMSG_LEN(newfds * sizeof(int)); 1203 rights->m_len = cm->cmsg_len; 1204 1205 lwkt_reltoken(&unp_token); 1206 return (0); 1207 } 1208 1209 static void 1210 unp_fp_externalize(struct lwp *lp, struct file *fp, int fd) 1211 { 1212 struct file *fx; 1213 int error; 1214 1215 lwkt_gettoken(&unp_token); 1216 1217 if (lp) { 1218 KKASSERT(fd >= 0); 1219 if (fp->f_flag & FREVOKED) { 1220 kprintf("Warning: revoked fp exiting unix socket\n"); 1221 fx = NULL; 1222 error = falloc(lp, &fx, NULL); 1223 if (error == 0) 1224 fsetfd(lp->lwp_proc->p_fd, fx, fd); 1225 else 1226 fsetfd(lp->lwp_proc->p_fd, NULL, fd); 1227 fdrop(fx); 1228 } else { 1229 fsetfd(lp->lwp_proc->p_fd, fp, fd); 1230 } 1231 } 1232 spin_lock(&unp_spin); 1233 fp->f_msgcount--; 1234 unp_rights--; 1235 spin_unlock(&unp_spin); 1236 fdrop(fp); 1237 1238 lwkt_reltoken(&unp_token); 1239 } 1240 1241 1242 void 1243 unp_init(void) 1244 { 1245 LIST_INIT(&unp_dhead); 1246 LIST_INIT(&unp_shead); 1247 spin_init(&unp_spin); 1248 } 1249 1250 static int 1251 unp_internalize(struct mbuf *control, struct thread *td) 1252 { 1253 struct proc *p = td->td_proc; 1254 struct filedesc *fdescp; 1255 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1256 struct file **rp; 1257 struct file *fp; 1258 int i, fd, *fdp; 1259 struct cmsgcred *cmcred; 1260 int oldfds; 1261 u_int newlen; 1262 int error; 1263 1264 KKASSERT(p); 1265 lwkt_gettoken(&unp_token); 1266 1267 fdescp = p->p_fd; 1268 if ((cm->cmsg_type != SCM_RIGHTS && cm->cmsg_type != SCM_CREDS) || 1269 cm->cmsg_level != SOL_SOCKET || 1270 CMSG_ALIGN(cm->cmsg_len) != control->m_len) { 1271 error = EINVAL; 1272 goto done; 1273 } 1274 1275 /* 1276 * Fill in credential information. 1277 */ 1278 if (cm->cmsg_type == SCM_CREDS) { 1279 cmcred = (struct cmsgcred *)CMSG_DATA(cm); 1280 cmcred->cmcred_pid = p->p_pid; 1281 cmcred->cmcred_uid = p->p_ucred->cr_ruid; 1282 cmcred->cmcred_gid = p->p_ucred->cr_rgid; 1283 cmcred->cmcred_euid = p->p_ucred->cr_uid; 1284 cmcred->cmcred_ngroups = MIN(p->p_ucred->cr_ngroups, 1285 CMGROUP_MAX); 1286 for (i = 0; i < cmcred->cmcred_ngroups; i++) 1287 cmcred->cmcred_groups[i] = p->p_ucred->cr_groups[i]; 1288 error = 0; 1289 goto done; 1290 } 1291 1292 /* 1293 * cmsghdr may not be aligned, do not allow calculation(s) to 1294 * go negative. 1295 */ 1296 if (cm->cmsg_len < CMSG_LEN(0)) { 1297 error = EINVAL; 1298 goto done; 1299 } 1300 1301 oldfds = (cm->cmsg_len - CMSG_LEN(0)) / sizeof (int); 1302 1303 /* 1304 * check that all the FDs passed in refer to legal OPEN files 1305 * If not, reject the entire operation. 1306 */ 1307 fdp = (int *)CMSG_DATA(cm); 1308 for (i = 0; i < oldfds; i++) { 1309 fd = *fdp++; 1310 if ((unsigned)fd >= fdescp->fd_nfiles || 1311 fdescp->fd_files[fd].fp == NULL) { 1312 error = EBADF; 1313 goto done; 1314 } 1315 if (fdescp->fd_files[fd].fp->f_type == DTYPE_KQUEUE) { 1316 error = EOPNOTSUPP; 1317 goto done; 1318 } 1319 } 1320 /* 1321 * Now replace the integer FDs with pointers to 1322 * the associated global file table entry.. 1323 * Allocate a bigger buffer as necessary. But if an cluster is not 1324 * enough, return E2BIG. 1325 */ 1326 newlen = CMSG_LEN(oldfds * sizeof(struct file *)); 1327 if (newlen > MCLBYTES) { 1328 error = E2BIG; 1329 goto done; 1330 } 1331 if (newlen - control->m_len > M_TRAILINGSPACE(control)) { 1332 if (control->m_flags & M_EXT) { 1333 error = E2BIG; 1334 goto done; 1335 } 1336 MCLGET(control, MB_WAIT); 1337 if (!(control->m_flags & M_EXT)) { 1338 error = ENOBUFS; 1339 goto done; 1340 } 1341 1342 /* copy the data to the cluster */ 1343 memcpy(mtod(control, char *), cm, cm->cmsg_len); 1344 cm = mtod(control, struct cmsghdr *); 1345 } 1346 1347 /* 1348 * Adjust length, in case sizeof(struct file *) and sizeof(int) 1349 * differs. 1350 */ 1351 cm->cmsg_len = newlen; 1352 control->m_len = CMSG_ALIGN(newlen); 1353 1354 /* 1355 * Transform the file descriptors into struct file pointers. 1356 * If sizeof (struct file *) is bigger than or equal to sizeof int, 1357 * then do it in reverse order so that the int won't get until 1358 * we're done. 1359 * If sizeof (struct file *) is smaller than sizeof int, then 1360 * do it in forward order. 1361 */ 1362 if (sizeof (struct file *) >= sizeof (int)) { 1363 fdp = (int *)CMSG_DATA(cm) + oldfds - 1; 1364 rp = (struct file **)CMSG_DATA(cm) + oldfds - 1; 1365 for (i = 0; i < oldfds; i++) { 1366 fp = fdescp->fd_files[*fdp--].fp; 1367 *rp-- = fp; 1368 fhold(fp); 1369 spin_lock(&unp_spin); 1370 fp->f_msgcount++; 1371 unp_rights++; 1372 spin_unlock(&unp_spin); 1373 } 1374 } else { 1375 fdp = (int *)CMSG_DATA(cm); 1376 rp = (struct file **)CMSG_DATA(cm); 1377 for (i = 0; i < oldfds; i++) { 1378 fp = fdescp->fd_files[*fdp++].fp; 1379 *rp++ = fp; 1380 fhold(fp); 1381 spin_lock(&unp_spin); 1382 fp->f_msgcount++; 1383 unp_rights++; 1384 spin_unlock(&unp_spin); 1385 } 1386 } 1387 error = 0; 1388 done: 1389 lwkt_reltoken(&unp_token); 1390 return error; 1391 } 1392 1393 /* 1394 * Garbage collect in-transit file descriptors that get lost due to 1395 * loops (i.e. when a socket is sent to another process over itself, 1396 * and more complex situations). 1397 * 1398 * NOT MPSAFE - TODO socket flush code and maybe closef. Rest is MPSAFE. 1399 */ 1400 1401 struct unp_gc_info { 1402 struct file **extra_ref; 1403 struct file *locked_fp; 1404 int defer; 1405 int index; 1406 int maxindex; 1407 }; 1408 1409 static void 1410 unp_gc(void) 1411 { 1412 struct unp_gc_info info; 1413 static boolean_t unp_gcing; 1414 struct file **fpp; 1415 int i; 1416 1417 /* 1418 * Only one gc can be in-progress at any given moment 1419 */ 1420 spin_lock(&unp_spin); 1421 if (unp_gcing) { 1422 spin_unlock(&unp_spin); 1423 return; 1424 } 1425 unp_gcing = TRUE; 1426 spin_unlock(&unp_spin); 1427 1428 lwkt_gettoken(&unp_token); 1429 1430 /* 1431 * Before going through all this, set all FDs to be NOT defered 1432 * and NOT externally accessible (not marked). During the scan 1433 * a fd can be marked externally accessible but we may or may not 1434 * be able to immediately process it (controlled by FDEFER). 1435 * 1436 * If we loop sleep a bit. The complexity of the topology can cause 1437 * multiple loops. Also failure to acquire the socket's so_rcv 1438 * token can cause us to loop. 1439 */ 1440 allfiles_scan_exclusive(unp_gc_clearmarks, NULL); 1441 do { 1442 info.defer = 0; 1443 allfiles_scan_exclusive(unp_gc_checkmarks, &info); 1444 if (info.defer) 1445 tsleep(&info, 0, "gcagain", 1); 1446 } while (info.defer); 1447 1448 /* 1449 * We grab an extra reference to each of the file table entries 1450 * that are not otherwise accessible and then free the rights 1451 * that are stored in messages on them. 1452 * 1453 * The bug in the orginal code is a little tricky, so I'll describe 1454 * what's wrong with it here. 1455 * 1456 * It is incorrect to simply unp_discard each entry for f_msgcount 1457 * times -- consider the case of sockets A and B that contain 1458 * references to each other. On a last close of some other socket, 1459 * we trigger a gc since the number of outstanding rights (unp_rights) 1460 * is non-zero. If during the sweep phase the gc code un_discards, 1461 * we end up doing a (full) closef on the descriptor. A closef on A 1462 * results in the following chain. Closef calls soo_close, which 1463 * calls soclose. Soclose calls first (through the switch 1464 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 1465 * returns because the previous instance had set unp_gcing, and 1466 * we return all the way back to soclose, which marks the socket 1467 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 1468 * to free up the rights that are queued in messages on the socket A, 1469 * i.e., the reference on B. The sorflush calls via the dom_dispose 1470 * switch unp_dispose, which unp_scans with unp_discard. This second 1471 * instance of unp_discard just calls closef on B. 1472 * 1473 * Well, a similar chain occurs on B, resulting in a sorflush on B, 1474 * which results in another closef on A. Unfortunately, A is already 1475 * being closed, and the descriptor has already been marked with 1476 * SS_NOFDREF, and soclose panics at this point. 1477 * 1478 * Here, we first take an extra reference to each inaccessible 1479 * descriptor. Then, we call sorflush ourself, since we know 1480 * it is a Unix domain socket anyhow. After we destroy all the 1481 * rights carried in messages, we do a last closef to get rid 1482 * of our extra reference. This is the last close, and the 1483 * unp_detach etc will shut down the socket. 1484 * 1485 * 91/09/19, bsy@cs.cmu.edu 1486 */ 1487 info.extra_ref = kmalloc(256 * sizeof(struct file *), M_FILE, M_WAITOK); 1488 info.maxindex = 256; 1489 1490 do { 1491 /* 1492 * Look for matches 1493 */ 1494 info.index = 0; 1495 allfiles_scan_exclusive(unp_gc_checkrefs, &info); 1496 1497 /* 1498 * For each FD on our hit list, do the following two things 1499 */ 1500 for (i = info.index, fpp = info.extra_ref; --i >= 0; ++fpp) { 1501 struct file *tfp = *fpp; 1502 if (tfp->f_type == DTYPE_SOCKET && tfp->f_data != NULL) 1503 sorflush((struct socket *)(tfp->f_data)); 1504 } 1505 for (i = info.index, fpp = info.extra_ref; --i >= 0; ++fpp) 1506 closef(*fpp, NULL); 1507 } while (info.index == info.maxindex); 1508 1509 lwkt_reltoken(&unp_token); 1510 1511 kfree((caddr_t)info.extra_ref, M_FILE); 1512 unp_gcing = FALSE; 1513 } 1514 1515 /* 1516 * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry 1517 */ 1518 static int 1519 unp_gc_checkrefs(struct file *fp, void *data) 1520 { 1521 struct unp_gc_info *info = data; 1522 1523 if (fp->f_count == 0) 1524 return(0); 1525 if (info->index == info->maxindex) 1526 return(-1); 1527 1528 /* 1529 * If all refs are from msgs, and it's not marked accessible 1530 * then it must be referenced from some unreachable cycle 1531 * of (shut-down) FDs, so include it in our 1532 * list of FDs to remove 1533 */ 1534 if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) { 1535 info->extra_ref[info->index++] = fp; 1536 fhold(fp); 1537 } 1538 return(0); 1539 } 1540 1541 /* 1542 * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry 1543 */ 1544 static int 1545 unp_gc_clearmarks(struct file *fp, void *data __unused) 1546 { 1547 atomic_clear_int(&fp->f_flag, FMARK | FDEFER); 1548 return(0); 1549 } 1550 1551 /* 1552 * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry 1553 */ 1554 static int 1555 unp_gc_checkmarks(struct file *fp, void *data) 1556 { 1557 struct unp_gc_info *info = data; 1558 struct socket *so; 1559 1560 /* 1561 * If the file is not open, skip it. Make sure it isn't marked 1562 * defered or we could loop forever, in case we somehow race 1563 * something. 1564 */ 1565 if (fp->f_count == 0) { 1566 if (fp->f_flag & FDEFER) 1567 atomic_clear_int(&fp->f_flag, FDEFER); 1568 return(0); 1569 } 1570 /* 1571 * If we already marked it as 'defer' in a 1572 * previous pass, then try process it this time 1573 * and un-mark it 1574 */ 1575 if (fp->f_flag & FDEFER) { 1576 atomic_clear_int(&fp->f_flag, FDEFER); 1577 } else { 1578 /* 1579 * if it's not defered, then check if it's 1580 * already marked.. if so skip it 1581 */ 1582 if (fp->f_flag & FMARK) 1583 return(0); 1584 /* 1585 * If all references are from messages 1586 * in transit, then skip it. it's not 1587 * externally accessible. 1588 */ 1589 if (fp->f_count == fp->f_msgcount) 1590 return(0); 1591 /* 1592 * If it got this far then it must be 1593 * externally accessible. 1594 */ 1595 atomic_set_int(&fp->f_flag, FMARK); 1596 } 1597 1598 /* 1599 * either it was defered, or it is externally 1600 * accessible and not already marked so. 1601 * Now check if it is possibly one of OUR sockets. 1602 */ 1603 if (fp->f_type != DTYPE_SOCKET || 1604 (so = (struct socket *)fp->f_data) == NULL) { 1605 return(0); 1606 } 1607 if (so->so_proto->pr_domain != &localdomain || 1608 !(so->so_proto->pr_flags & PR_RIGHTS)) { 1609 return(0); 1610 } 1611 1612 /* 1613 * So, Ok, it's one of our sockets and it IS externally accessible 1614 * (or was defered). Now we look to see if we hold any file 1615 * descriptors in its message buffers. Follow those links and mark 1616 * them as accessible too. 1617 * 1618 * We are holding multiple spinlocks here, if we cannot get the 1619 * token non-blocking defer until the next loop. 1620 */ 1621 info->locked_fp = fp; 1622 if (lwkt_trytoken(&so->so_rcv.ssb_token)) { 1623 unp_scan(so->so_rcv.ssb_mb, unp_mark, info); 1624 lwkt_reltoken(&so->so_rcv.ssb_token); 1625 } else { 1626 atomic_set_int(&fp->f_flag, FDEFER); 1627 ++info->defer; 1628 } 1629 return (0); 1630 } 1631 1632 /* 1633 * Scan all unix domain sockets and replace any revoked file pointers 1634 * found with the dummy file pointer fx. We don't worry about races 1635 * against file pointers being read out as those are handled in the 1636 * externalize code. 1637 */ 1638 1639 #define REVOKE_GC_MAXFILES 32 1640 1641 struct unp_revoke_gc_info { 1642 struct file *fx; 1643 struct file *fary[REVOKE_GC_MAXFILES]; 1644 int fcount; 1645 }; 1646 1647 void 1648 unp_revoke_gc(struct file *fx) 1649 { 1650 struct unp_revoke_gc_info info; 1651 int i; 1652 1653 lwkt_gettoken(&unp_token); 1654 info.fx = fx; 1655 do { 1656 info.fcount = 0; 1657 allfiles_scan_exclusive(unp_revoke_gc_check, &info); 1658 for (i = 0; i < info.fcount; ++i) 1659 unp_fp_externalize(NULL, info.fary[i], -1); 1660 } while (info.fcount == REVOKE_GC_MAXFILES); 1661 lwkt_reltoken(&unp_token); 1662 } 1663 1664 /* 1665 * Check for and replace revoked descriptors. 1666 * 1667 * WARNING: This routine is not allowed to block. 1668 */ 1669 static int 1670 unp_revoke_gc_check(struct file *fps, void *vinfo) 1671 { 1672 struct unp_revoke_gc_info *info = vinfo; 1673 struct file *fp; 1674 struct socket *so; 1675 struct mbuf *m0; 1676 struct mbuf *m; 1677 struct file **rp; 1678 struct cmsghdr *cm; 1679 int i; 1680 int qfds; 1681 1682 /* 1683 * Is this a unix domain socket with rights-passing abilities? 1684 */ 1685 if (fps->f_type != DTYPE_SOCKET) 1686 return (0); 1687 if ((so = (struct socket *)fps->f_data) == NULL) 1688 return(0); 1689 if (so->so_proto->pr_domain != &localdomain) 1690 return(0); 1691 if ((so->so_proto->pr_flags & PR_RIGHTS) == 0) 1692 return(0); 1693 1694 /* 1695 * Scan the mbufs for control messages and replace any revoked 1696 * descriptors we find. 1697 */ 1698 lwkt_gettoken(&so->so_rcv.ssb_token); 1699 m0 = so->so_rcv.ssb_mb; 1700 while (m0) { 1701 for (m = m0; m; m = m->m_next) { 1702 if (m->m_type != MT_CONTROL) 1703 continue; 1704 if (m->m_len < sizeof(*cm)) 1705 continue; 1706 cm = mtod(m, struct cmsghdr *); 1707 if (cm->cmsg_level != SOL_SOCKET || 1708 cm->cmsg_type != SCM_RIGHTS) { 1709 continue; 1710 } 1711 qfds = (cm->cmsg_len - CMSG_LEN(0)) / sizeof(void *); 1712 rp = (struct file **)CMSG_DATA(cm); 1713 for (i = 0; i < qfds; i++) { 1714 fp = rp[i]; 1715 if (fp->f_flag & FREVOKED) { 1716 kprintf("Warning: Removing revoked fp from unix domain socket queue\n"); 1717 fhold(info->fx); 1718 info->fx->f_msgcount++; 1719 unp_rights++; 1720 rp[i] = info->fx; 1721 info->fary[info->fcount++] = fp; 1722 } 1723 if (info->fcount == REVOKE_GC_MAXFILES) 1724 break; 1725 } 1726 if (info->fcount == REVOKE_GC_MAXFILES) 1727 break; 1728 } 1729 m0 = m0->m_nextpkt; 1730 if (info->fcount == REVOKE_GC_MAXFILES) 1731 break; 1732 } 1733 lwkt_reltoken(&so->so_rcv.ssb_token); 1734 1735 /* 1736 * Stop the scan if we filled up our array. 1737 */ 1738 if (info->fcount == REVOKE_GC_MAXFILES) 1739 return(-1); 1740 return(0); 1741 } 1742 1743 void 1744 unp_dispose(struct mbuf *m) 1745 { 1746 lwkt_gettoken(&unp_token); 1747 if (m) 1748 unp_scan(m, unp_discard, NULL); 1749 lwkt_reltoken(&unp_token); 1750 } 1751 1752 static int 1753 unp_listen(struct unpcb *unp, struct thread *td) 1754 { 1755 struct proc *p = td->td_proc; 1756 1757 KKASSERT(p); 1758 lwkt_gettoken(&unp_token); 1759 cru2x(p->p_ucred, &unp->unp_peercred); 1760 unp->unp_flags |= UNP_HAVEPCCACHED; 1761 lwkt_reltoken(&unp_token); 1762 return (0); 1763 } 1764 1765 static void 1766 unp_scan(struct mbuf *m0, void (*op)(struct file *, void *), void *data) 1767 { 1768 struct mbuf *m; 1769 struct file **rp; 1770 struct cmsghdr *cm; 1771 int i; 1772 int qfds; 1773 1774 while (m0) { 1775 for (m = m0; m; m = m->m_next) { 1776 if (m->m_type == MT_CONTROL && 1777 m->m_len >= sizeof(*cm)) { 1778 cm = mtod(m, struct cmsghdr *); 1779 if (cm->cmsg_level != SOL_SOCKET || 1780 cm->cmsg_type != SCM_RIGHTS) 1781 continue; 1782 qfds = (cm->cmsg_len - CMSG_LEN(0)) / 1783 sizeof(void *); 1784 rp = (struct file **)CMSG_DATA(cm); 1785 for (i = 0; i < qfds; i++) 1786 (*op)(*rp++, data); 1787 break; /* XXX, but saves time */ 1788 } 1789 } 1790 m0 = m0->m_nextpkt; 1791 } 1792 } 1793 1794 /* 1795 * Mark visibility. info->defer is recalculated on every pass. 1796 */ 1797 static void 1798 unp_mark(struct file *fp, void *data) 1799 { 1800 struct unp_gc_info *info = data; 1801 1802 if ((fp->f_flag & FMARK) == 0) { 1803 ++info->defer; 1804 atomic_set_int(&fp->f_flag, FMARK | FDEFER); 1805 } else if (fp->f_flag & FDEFER) { 1806 ++info->defer; 1807 } 1808 } 1809 1810 static void 1811 unp_discard(struct file *fp, void *data __unused) 1812 { 1813 spin_lock(&unp_spin); 1814 fp->f_msgcount--; 1815 unp_rights--; 1816 spin_unlock(&unp_spin); 1817 closef(fp, NULL); 1818 } 1819 1820