1 /* 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 30 * $FreeBSD: src/sys/kern/uipc_usrreq.c,v 1.54.2.10 2003/03/04 17:28:09 nectar Exp $ 31 */ 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/domain.h> 37 #include <sys/fcntl.h> 38 #include <sys/malloc.h> /* XXX must be before <sys/file.h> */ 39 #include <sys/proc.h> 40 #include <sys/file.h> 41 #include <sys/filedesc.h> 42 #include <sys/mbuf.h> 43 #include <sys/nlookup.h> 44 #include <sys/protosw.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/resourcevar.h> 48 #include <sys/stat.h> 49 #include <sys/mount.h> 50 #include <sys/sysctl.h> 51 #include <sys/un.h> 52 #include <sys/unpcb.h> 53 #include <sys/vnode.h> 54 55 #include <sys/file2.h> 56 #include <sys/spinlock2.h> 57 #include <sys/socketvar2.h> 58 #include <sys/msgport2.h> 59 60 #define UNP_ISATTACHED(unp) ((unp) != NULL) 61 62 typedef struct unp_defdiscard { 63 struct unp_defdiscard *next; 64 struct file *fp; 65 } *unp_defdiscard_t; 66 67 static MALLOC_DEFINE(M_UNPCB, "unpcb", "unpcb struct"); 68 static unp_gen_t unp_gencnt; 69 static u_int unp_count; 70 71 static struct unp_head unp_shead, unp_dhead; 72 73 static struct lwkt_token unp_token = LWKT_TOKEN_INITIALIZER(unp_token); 74 static int unp_defdiscard_nest; 75 static unp_defdiscard_t unp_defdiscard_base; 76 77 /* 78 * Unix communications domain. 79 * 80 * TODO: 81 * RDM 82 * rethink name space problems 83 * need a proper out-of-band 84 * lock pushdown 85 */ 86 static struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; 87 static ino_t unp_ino = 1; /* prototype for fake inode numbers */ 88 static struct spinlock unp_ino_spin = SPINLOCK_INITIALIZER(&unp_ino_spin, "unp_ino_spin"); 89 90 static int unp_attach (struct socket *, struct pru_attach_info *); 91 static void unp_detach (struct unpcb *); 92 static int unp_bind (struct unpcb *,struct sockaddr *, struct thread *); 93 static int unp_connect (struct socket *,struct sockaddr *, 94 struct thread *); 95 static void unp_disconnect (struct unpcb *); 96 static void unp_shutdown (struct unpcb *); 97 static void unp_drop (struct unpcb *, int); 98 static void unp_gc (void); 99 static int unp_gc_clearmarks(struct file *, void *); 100 static int unp_gc_checkmarks(struct file *, void *); 101 static int unp_gc_checkrefs(struct file *, void *); 102 static int unp_revoke_gc_check(struct file *, void *); 103 static void unp_scan (struct mbuf *, void (*)(struct file *, void *), 104 void *data); 105 static void unp_mark (struct file *, void *data); 106 static void unp_discard (struct file *, void *); 107 static int unp_internalize (struct mbuf *, struct thread *); 108 static int unp_listen (struct unpcb *, struct thread *); 109 static void unp_fp_externalize(struct lwp *lp, struct file *fp, int fd); 110 111 /* 112 * SMP Considerations: 113 * 114 * Since unp_token will be automaticly released upon execution of 115 * blocking code, we need to reference unp_conn before any possible 116 * blocking code to prevent it from being ripped behind our back. 117 * 118 * Any adjustment to unp->unp_conn requires both the global unp_token 119 * AND the per-unp token (lwkt_token_pool_lookup(unp)) to be held. 120 * 121 * Any access to so_pcb to obtain unp requires the pool token for 122 * unp to be held. 123 */ 124 125 /* NOTE: unp_token MUST be held */ 126 static __inline void 127 unp_reference(struct unpcb *unp) 128 { 129 atomic_add_int(&unp->unp_refcnt, 1); 130 } 131 132 /* NOTE: unp_token MUST be held */ 133 static __inline void 134 unp_free(struct unpcb *unp) 135 { 136 KKASSERT(unp->unp_refcnt > 0); 137 if (atomic_fetchadd_int(&unp->unp_refcnt, -1) == 1) 138 unp_detach(unp); 139 } 140 141 static __inline struct unpcb * 142 unp_getsocktoken(struct socket *so) 143 { 144 struct unpcb *unp; 145 146 /* 147 * The unp pointer is invalid until we verify that it is 148 * good by re-checking so_pcb AFTER obtaining the token. 149 */ 150 while ((unp = so->so_pcb) != NULL) { 151 lwkt_getpooltoken(unp); 152 if (unp == so->so_pcb) 153 break; 154 lwkt_relpooltoken(unp); 155 } 156 return unp; 157 } 158 159 static void 160 unp_reltoken(struct unpcb *unp) 161 { 162 if (unp != NULL) 163 lwkt_relpooltoken(unp); 164 } 165 166 /* 167 * NOTE: (so) is referenced from soabort*() and netmsg_pru_abort() 168 * will sofree() it when we return. 169 */ 170 static void 171 uipc_abort(netmsg_t msg) 172 { 173 struct unpcb *unp; 174 int error; 175 176 lwkt_gettoken(&unp_token); 177 unp = msg->base.nm_so->so_pcb; 178 if (UNP_ISATTACHED(unp)) { 179 unp_drop(unp, ECONNABORTED); 180 unp_free(unp); 181 error = 0; 182 } else { 183 error = EINVAL; 184 } 185 lwkt_reltoken(&unp_token); 186 187 lwkt_replymsg(&msg->lmsg, error); 188 } 189 190 static void 191 uipc_accept(netmsg_t msg) 192 { 193 struct unpcb *unp; 194 int error; 195 196 lwkt_gettoken(&unp_token); 197 unp = msg->base.nm_so->so_pcb; 198 if (!UNP_ISATTACHED(unp)) { 199 error = EINVAL; 200 } else { 201 struct unpcb *unp2 = unp->unp_conn; 202 203 /* 204 * Pass back name of connected socket, 205 * if it was bound and we are still connected 206 * (our peer may have closed already!). 207 */ 208 if (unp2 && unp2->unp_addr) { 209 unp_reference(unp2); 210 *msg->accept.nm_nam = dup_sockaddr( 211 (struct sockaddr *)unp2->unp_addr); 212 unp_free(unp2); 213 } else { 214 *msg->accept.nm_nam = dup_sockaddr(&sun_noname); 215 } 216 error = 0; 217 } 218 lwkt_reltoken(&unp_token); 219 lwkt_replymsg(&msg->lmsg, error); 220 } 221 222 static void 223 uipc_attach(netmsg_t msg) 224 { 225 struct unpcb *unp; 226 int error; 227 228 lwkt_gettoken(&unp_token); 229 unp = msg->base.nm_so->so_pcb; 230 KASSERT(unp == NULL, ("double unp attach")); 231 error = unp_attach(msg->base.nm_so, msg->attach.nm_ai); 232 lwkt_reltoken(&unp_token); 233 lwkt_replymsg(&msg->lmsg, error); 234 } 235 236 static void 237 uipc_bind(netmsg_t msg) 238 { 239 struct unpcb *unp; 240 int error; 241 242 lwkt_gettoken(&unp_token); 243 unp = msg->base.nm_so->so_pcb; 244 if (UNP_ISATTACHED(unp)) 245 error = unp_bind(unp, msg->bind.nm_nam, msg->bind.nm_td); 246 else 247 error = EINVAL; 248 lwkt_reltoken(&unp_token); 249 lwkt_replymsg(&msg->lmsg, error); 250 } 251 252 static void 253 uipc_connect(netmsg_t msg) 254 { 255 struct unpcb *unp; 256 int error; 257 258 unp = msg->base.nm_so->so_pcb; 259 if (UNP_ISATTACHED(unp)) { 260 error = unp_connect(msg->base.nm_so, 261 msg->connect.nm_nam, 262 msg->connect.nm_td); 263 } else { 264 error = EINVAL; 265 } 266 lwkt_replymsg(&msg->lmsg, error); 267 } 268 269 static void 270 uipc_connect2(netmsg_t msg) 271 { 272 struct unpcb *unp; 273 int error; 274 275 unp = msg->connect2.nm_so1->so_pcb; 276 if (UNP_ISATTACHED(unp)) { 277 error = unp_connect2(msg->connect2.nm_so1, 278 msg->connect2.nm_so2); 279 } else { 280 error = EINVAL; 281 } 282 lwkt_replymsg(&msg->lmsg, error); 283 } 284 285 /* control is EOPNOTSUPP */ 286 287 static void 288 uipc_detach(netmsg_t msg) 289 { 290 struct unpcb *unp; 291 int error; 292 293 lwkt_gettoken(&unp_token); 294 unp = msg->base.nm_so->so_pcb; 295 if (UNP_ISATTACHED(unp)) { 296 unp_free(unp); 297 error = 0; 298 } else { 299 error = EINVAL; 300 } 301 lwkt_reltoken(&unp_token); 302 lwkt_replymsg(&msg->lmsg, error); 303 } 304 305 static void 306 uipc_disconnect(netmsg_t msg) 307 { 308 struct unpcb *unp; 309 int error; 310 311 lwkt_gettoken(&unp_token); 312 unp = msg->base.nm_so->so_pcb; 313 if (UNP_ISATTACHED(unp)) { 314 unp_disconnect(unp); 315 error = 0; 316 } else { 317 error = EINVAL; 318 } 319 lwkt_reltoken(&unp_token); 320 lwkt_replymsg(&msg->lmsg, error); 321 } 322 323 static void 324 uipc_listen(netmsg_t msg) 325 { 326 struct unpcb *unp; 327 int error; 328 329 lwkt_gettoken(&unp_token); 330 unp = msg->base.nm_so->so_pcb; 331 if (!UNP_ISATTACHED(unp) || unp->unp_vnode == NULL) 332 error = EINVAL; 333 else 334 error = unp_listen(unp, msg->listen.nm_td); 335 lwkt_reltoken(&unp_token); 336 lwkt_replymsg(&msg->lmsg, error); 337 } 338 339 static void 340 uipc_peeraddr(netmsg_t msg) 341 { 342 struct unpcb *unp; 343 int error; 344 345 lwkt_gettoken(&unp_token); 346 unp = msg->base.nm_so->so_pcb; 347 if (!UNP_ISATTACHED(unp)) { 348 error = EINVAL; 349 } else if (unp->unp_conn && unp->unp_conn->unp_addr) { 350 struct unpcb *unp2 = unp->unp_conn; 351 352 unp_reference(unp2); 353 *msg->peeraddr.nm_nam = dup_sockaddr( 354 (struct sockaddr *)unp2->unp_addr); 355 unp_free(unp2); 356 error = 0; 357 } else { 358 /* 359 * XXX: It seems that this test always fails even when 360 * connection is established. So, this else clause is 361 * added as workaround to return PF_LOCAL sockaddr. 362 */ 363 *msg->peeraddr.nm_nam = dup_sockaddr(&sun_noname); 364 error = 0; 365 } 366 lwkt_reltoken(&unp_token); 367 lwkt_replymsg(&msg->lmsg, error); 368 } 369 370 static void 371 uipc_rcvd(netmsg_t msg) 372 { 373 struct unpcb *unp, *unp2; 374 struct socket *so; 375 struct socket *so2; 376 int error; 377 378 /* 379 * so_pcb is only modified with both the global and the unp 380 * pool token held. 381 */ 382 so = msg->base.nm_so; 383 unp = unp_getsocktoken(so); 384 385 if (!UNP_ISATTACHED(unp)) { 386 error = EINVAL; 387 goto done; 388 } 389 390 switch (so->so_type) { 391 case SOCK_DGRAM: 392 panic("uipc_rcvd DGRAM?"); 393 /*NOTREACHED*/ 394 case SOCK_STREAM: 395 case SOCK_SEQPACKET: 396 if (unp->unp_conn == NULL) 397 break; 398 unp2 = unp->unp_conn; /* protected by pool token */ 399 400 /* 401 * Because we are transfering mbufs directly to the 402 * peer socket we have to use SSB_STOP on the sender 403 * to prevent it from building up infinite mbufs. 404 * 405 * As in several places in this module w ehave to ref unp2 406 * to ensure that it does not get ripped out from under us 407 * if we block on the so2 token or in sowwakeup(). 408 */ 409 so2 = unp2->unp_socket; 410 unp_reference(unp2); 411 lwkt_gettoken(&so2->so_rcv.ssb_token); 412 if (so->so_rcv.ssb_cc < so2->so_snd.ssb_hiwat && 413 so->so_rcv.ssb_mbcnt < so2->so_snd.ssb_mbmax 414 ) { 415 atomic_clear_int(&so2->so_snd.ssb_flags, SSB_STOP); 416 417 sowwakeup(so2); 418 } 419 lwkt_reltoken(&so2->so_rcv.ssb_token); 420 unp_free(unp2); 421 break; 422 default: 423 panic("uipc_rcvd unknown socktype"); 424 /*NOTREACHED*/ 425 } 426 error = 0; 427 done: 428 unp_reltoken(unp); 429 lwkt_replymsg(&msg->lmsg, error); 430 } 431 432 /* pru_rcvoob is EOPNOTSUPP */ 433 434 static void 435 uipc_send(netmsg_t msg) 436 { 437 struct unpcb *unp, *unp2; 438 struct socket *so; 439 struct socket *so2; 440 struct mbuf *control; 441 struct mbuf *m; 442 int error = 0; 443 444 so = msg->base.nm_so; 445 control = msg->send.nm_control; 446 m = msg->send.nm_m; 447 448 /* 449 * so_pcb is only modified with both the global and the unp 450 * pool token held. 451 */ 452 so = msg->base.nm_so; 453 unp = unp_getsocktoken(so); 454 455 if (!UNP_ISATTACHED(unp)) { 456 error = EINVAL; 457 goto release; 458 } 459 460 if (msg->send.nm_flags & PRUS_OOB) { 461 error = EOPNOTSUPP; 462 goto release; 463 } 464 465 wakeup_start_delayed(); 466 467 if (control && (error = unp_internalize(control, msg->send.nm_td))) 468 goto release; 469 470 switch (so->so_type) { 471 case SOCK_DGRAM: 472 { 473 struct sockaddr *from; 474 475 if (msg->send.nm_addr) { 476 if (unp->unp_conn) { 477 error = EISCONN; 478 break; 479 } 480 error = unp_connect(so, 481 msg->send.nm_addr, 482 msg->send.nm_td); 483 if (error) 484 break; 485 } else { 486 if (unp->unp_conn == NULL) { 487 error = ENOTCONN; 488 break; 489 } 490 } 491 unp2 = unp->unp_conn; 492 so2 = unp2->unp_socket; 493 if (unp->unp_addr) 494 from = (struct sockaddr *)unp->unp_addr; 495 else 496 from = &sun_noname; 497 498 unp_reference(unp2); 499 500 lwkt_gettoken(&so2->so_rcv.ssb_token); 501 if (ssb_appendaddr(&so2->so_rcv, from, m, control)) { 502 sorwakeup(so2); 503 m = NULL; 504 control = NULL; 505 } else { 506 error = ENOBUFS; 507 } 508 if (msg->send.nm_addr) 509 unp_disconnect(unp); 510 lwkt_reltoken(&so2->so_rcv.ssb_token); 511 512 unp_free(unp2); 513 break; 514 } 515 516 case SOCK_STREAM: 517 case SOCK_SEQPACKET: 518 /* Connect if not connected yet. */ 519 /* 520 * Note: A better implementation would complain 521 * if not equal to the peer's address. 522 */ 523 if (!(so->so_state & SS_ISCONNECTED)) { 524 if (msg->send.nm_addr) { 525 error = unp_connect(so, 526 msg->send.nm_addr, 527 msg->send.nm_td); 528 if (error) 529 break; /* XXX */ 530 } else { 531 error = ENOTCONN; 532 break; 533 } 534 } 535 536 if (so->so_state & SS_CANTSENDMORE) { 537 error = EPIPE; 538 break; 539 } 540 if (unp->unp_conn == NULL) 541 panic("uipc_send connected but no connection?"); 542 unp2 = unp->unp_conn; 543 so2 = unp2->unp_socket; 544 545 unp_reference(unp2); 546 547 /* 548 * Send to paired receive port, and then reduce 549 * send buffer hiwater marks to maintain backpressure. 550 * Wake up readers. 551 */ 552 lwkt_gettoken(&so2->so_rcv.ssb_token); 553 if (control) { 554 if (ssb_appendcontrol(&so2->so_rcv, m, control)) { 555 control = NULL; 556 m = NULL; 557 } 558 } else if (so->so_type == SOCK_SEQPACKET) { 559 sbappendrecord(&so2->so_rcv.sb, m); 560 m = NULL; 561 } else { 562 sbappend(&so2->so_rcv.sb, m); 563 m = NULL; 564 } 565 566 /* 567 * Because we are transfering mbufs directly to the 568 * peer socket we have to use SSB_STOP on the sender 569 * to prevent it from building up infinite mbufs. 570 */ 571 if (so2->so_rcv.ssb_cc >= so->so_snd.ssb_hiwat || 572 so2->so_rcv.ssb_mbcnt >= so->so_snd.ssb_mbmax 573 ) { 574 atomic_set_int(&so->so_snd.ssb_flags, SSB_STOP); 575 } 576 lwkt_reltoken(&so2->so_rcv.ssb_token); 577 sorwakeup(so2); 578 579 unp_free(unp2); 580 break; 581 582 default: 583 panic("uipc_send unknown socktype"); 584 } 585 586 /* 587 * SEND_EOF is equivalent to a SEND followed by a SHUTDOWN. 588 */ 589 if (msg->send.nm_flags & PRUS_EOF) { 590 socantsendmore(so); 591 unp_shutdown(unp); 592 } 593 594 if (control && error != 0) 595 unp_dispose(control); 596 release: 597 unp_reltoken(unp); 598 wakeup_end_delayed(); 599 600 if (control) 601 m_freem(control); 602 if (m) 603 m_freem(m); 604 lwkt_replymsg(&msg->lmsg, error); 605 } 606 607 /* 608 * MPSAFE 609 */ 610 static void 611 uipc_sense(netmsg_t msg) 612 { 613 struct unpcb *unp; 614 struct socket *so; 615 struct stat *sb; 616 int error; 617 618 so = msg->base.nm_so; 619 sb = msg->sense.nm_stat; 620 621 /* 622 * so_pcb is only modified with both the global and the unp 623 * pool token held. 624 */ 625 unp = unp_getsocktoken(so); 626 627 if (!UNP_ISATTACHED(unp)) { 628 error = EINVAL; 629 goto done; 630 } 631 632 sb->st_blksize = so->so_snd.ssb_hiwat; 633 sb->st_dev = NOUDEV; 634 if (unp->unp_ino == 0) { /* make up a non-zero inode number */ 635 spin_lock(&unp_ino_spin); 636 unp->unp_ino = unp_ino++; 637 spin_unlock(&unp_ino_spin); 638 } 639 sb->st_ino = unp->unp_ino; 640 error = 0; 641 done: 642 unp_reltoken(unp); 643 lwkt_replymsg(&msg->lmsg, error); 644 } 645 646 static void 647 uipc_shutdown(netmsg_t msg) 648 { 649 struct socket *so; 650 struct unpcb *unp; 651 int error; 652 653 /* 654 * so_pcb is only modified with both the global and the unp 655 * pool token held. 656 */ 657 so = msg->base.nm_so; 658 unp = unp_getsocktoken(so); 659 660 if (UNP_ISATTACHED(unp)) { 661 socantsendmore(so); 662 unp_shutdown(unp); 663 error = 0; 664 } else { 665 error = EINVAL; 666 } 667 668 unp_reltoken(unp); 669 lwkt_replymsg(&msg->lmsg, error); 670 } 671 672 static void 673 uipc_sockaddr(netmsg_t msg) 674 { 675 struct socket *so; 676 struct unpcb *unp; 677 int error; 678 679 /* 680 * so_pcb is only modified with both the global and the unp 681 * pool token held. 682 */ 683 so = msg->base.nm_so; 684 unp = unp_getsocktoken(so); 685 686 if (UNP_ISATTACHED(unp)) { 687 if (unp->unp_addr) { 688 *msg->sockaddr.nm_nam = 689 dup_sockaddr((struct sockaddr *)unp->unp_addr); 690 } 691 error = 0; 692 } else { 693 error = EINVAL; 694 } 695 696 unp_reltoken(unp); 697 lwkt_replymsg(&msg->lmsg, error); 698 } 699 700 struct pr_usrreqs uipc_usrreqs = { 701 .pru_abort = uipc_abort, 702 .pru_accept = uipc_accept, 703 .pru_attach = uipc_attach, 704 .pru_bind = uipc_bind, 705 .pru_connect = uipc_connect, 706 .pru_connect2 = uipc_connect2, 707 .pru_control = pr_generic_notsupp, 708 .pru_detach = uipc_detach, 709 .pru_disconnect = uipc_disconnect, 710 .pru_listen = uipc_listen, 711 .pru_peeraddr = uipc_peeraddr, 712 .pru_rcvd = uipc_rcvd, 713 .pru_rcvoob = pr_generic_notsupp, 714 .pru_send = uipc_send, 715 .pru_sense = uipc_sense, 716 .pru_shutdown = uipc_shutdown, 717 .pru_sockaddr = uipc_sockaddr, 718 .pru_sosend = sosend, 719 .pru_soreceive = soreceive 720 }; 721 722 void 723 uipc_ctloutput(netmsg_t msg) 724 { 725 struct socket *so; 726 struct sockopt *sopt; 727 struct unpcb *unp; 728 int error = 0; 729 730 lwkt_gettoken(&unp_token); 731 so = msg->base.nm_so; 732 sopt = msg->ctloutput.nm_sopt; 733 unp = so->so_pcb; 734 735 switch (sopt->sopt_dir) { 736 case SOPT_GET: 737 switch (sopt->sopt_name) { 738 case LOCAL_PEERCRED: 739 if (unp->unp_flags & UNP_HAVEPC) 740 soopt_from_kbuf(sopt, &unp->unp_peercred, 741 sizeof(unp->unp_peercred)); 742 else { 743 if (so->so_type == SOCK_STREAM) 744 error = ENOTCONN; 745 else if (so->so_type == SOCK_SEQPACKET) 746 error = ENOTCONN; 747 else 748 error = EINVAL; 749 } 750 break; 751 default: 752 error = EOPNOTSUPP; 753 break; 754 } 755 break; 756 case SOPT_SET: 757 default: 758 error = EOPNOTSUPP; 759 break; 760 } 761 lwkt_reltoken(&unp_token); 762 lwkt_replymsg(&msg->lmsg, error); 763 } 764 765 /* 766 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 767 * for stream sockets, although the total for sender and receiver is 768 * actually only PIPSIZ. 769 * 770 * Datagram sockets really use the sendspace as the maximum datagram size, 771 * and don't really want to reserve the sendspace. Their recvspace should 772 * be large enough for at least one max-size datagram plus address. 773 * 774 * We want the local send/recv space to be significant larger then lo0's 775 * mtu of 16384. 776 */ 777 #ifndef PIPSIZ 778 #define PIPSIZ 57344 779 #endif 780 static u_long unpst_sendspace = PIPSIZ; 781 static u_long unpst_recvspace = PIPSIZ; 782 static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 783 static u_long unpdg_recvspace = 4*1024; 784 785 static int unp_rights; /* file descriptors in flight */ 786 static struct spinlock unp_spin = SPINLOCK_INITIALIZER(&unp_spin, "unp_spin"); 787 788 SYSCTL_DECL(_net_local_seqpacket); 789 SYSCTL_DECL(_net_local_stream); 790 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, 791 &unpst_sendspace, 0, "Size of stream socket send buffer"); 792 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, 793 &unpst_recvspace, 0, "Size of stream socket receive buffer"); 794 795 SYSCTL_DECL(_net_local_dgram); 796 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, 797 &unpdg_sendspace, 0, "Max datagram socket size"); 798 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, 799 &unpdg_recvspace, 0, "Size of datagram socket receive buffer"); 800 801 SYSCTL_DECL(_net_local); 802 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, 803 "File descriptors in flight"); 804 805 static int 806 unp_attach(struct socket *so, struct pru_attach_info *ai) 807 { 808 struct unpcb *unp; 809 int error; 810 811 lwkt_gettoken(&unp_token); 812 813 if (so->so_snd.ssb_hiwat == 0 || so->so_rcv.ssb_hiwat == 0) { 814 switch (so->so_type) { 815 case SOCK_STREAM: 816 case SOCK_SEQPACKET: 817 error = soreserve(so, unpst_sendspace, unpst_recvspace, 818 ai->sb_rlimit); 819 break; 820 821 case SOCK_DGRAM: 822 error = soreserve(so, unpdg_sendspace, unpdg_recvspace, 823 ai->sb_rlimit); 824 break; 825 826 default: 827 panic("unp_attach"); 828 } 829 if (error) 830 goto failed; 831 } 832 833 /* 834 * In order to support sendfile we have to set either SSB_STOPSUPP 835 * or SSB_PREALLOC. Unix domain sockets use the SSB_STOP flow 836 * control mechanism. 837 */ 838 if (so->so_type == SOCK_STREAM) { 839 atomic_set_int(&so->so_rcv.ssb_flags, SSB_STOPSUPP); 840 atomic_set_int(&so->so_snd.ssb_flags, SSB_STOPSUPP); 841 } 842 843 unp = kmalloc(sizeof(*unp), M_UNPCB, M_WAITOK | M_ZERO | M_NULLOK); 844 if (unp == NULL) { 845 error = ENOBUFS; 846 goto failed; 847 } 848 unp->unp_refcnt = 1; 849 unp->unp_gencnt = ++unp_gencnt; 850 unp_count++; 851 LIST_INIT(&unp->unp_refs); 852 unp->unp_socket = so; 853 unp->unp_rvnode = ai->fd_rdir; /* jail cruft XXX JH */ 854 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead 855 : &unp_shead, unp, unp_link); 856 so->so_pcb = (caddr_t)unp; 857 soreference(so); 858 error = 0; 859 failed: 860 lwkt_reltoken(&unp_token); 861 return error; 862 } 863 864 static void 865 unp_detach(struct unpcb *unp) 866 { 867 struct socket *so; 868 869 lwkt_gettoken(&unp_token); 870 lwkt_getpooltoken(unp); 871 872 LIST_REMOVE(unp, unp_link); /* both tokens required */ 873 unp->unp_gencnt = ++unp_gencnt; 874 --unp_count; 875 if (unp->unp_vnode) { 876 unp->unp_vnode->v_socket = NULL; 877 vrele(unp->unp_vnode); 878 unp->unp_vnode = NULL; 879 } 880 if (unp->unp_conn) 881 unp_disconnect(unp); 882 while (!LIST_EMPTY(&unp->unp_refs)) 883 unp_drop(LIST_FIRST(&unp->unp_refs), ECONNRESET); 884 soisdisconnected(unp->unp_socket); 885 so = unp->unp_socket; 886 soreference(so); /* for delayed sorflush */ 887 KKASSERT(so->so_pcb == unp); 888 so->so_pcb = NULL; /* both tokens required */ 889 unp->unp_socket = NULL; 890 sofree(so); /* remove pcb ref */ 891 892 if (unp_rights) { 893 /* 894 * Normally the receive buffer is flushed later, 895 * in sofree, but if our receive buffer holds references 896 * to descriptors that are now garbage, we will dispose 897 * of those descriptor references after the garbage collector 898 * gets them (resulting in a "panic: closef: count < 0"). 899 */ 900 sorflush(so); 901 unp_gc(); 902 } 903 sofree(so); 904 lwkt_relpooltoken(unp); 905 lwkt_reltoken(&unp_token); 906 907 if (unp->unp_addr) 908 kfree(unp->unp_addr, M_SONAME); 909 kfree(unp, M_UNPCB); 910 } 911 912 static int 913 unp_bind(struct unpcb *unp, struct sockaddr *nam, struct thread *td) 914 { 915 struct proc *p = td->td_proc; 916 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 917 struct vnode *vp; 918 struct vattr vattr; 919 int error, namelen; 920 struct nlookupdata nd; 921 char buf[SOCK_MAXADDRLEN]; 922 923 lwkt_gettoken(&unp_token); 924 if (unp->unp_vnode != NULL) { 925 error = EINVAL; 926 goto failed; 927 } 928 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); 929 if (namelen <= 0) { 930 error = EINVAL; 931 goto failed; 932 } 933 strncpy(buf, soun->sun_path, namelen); 934 buf[namelen] = 0; /* null-terminate the string */ 935 error = nlookup_init(&nd, buf, UIO_SYSSPACE, 936 NLC_LOCKVP | NLC_CREATE | NLC_REFDVP); 937 if (error == 0) 938 error = nlookup(&nd); 939 if (error == 0 && nd.nl_nch.ncp->nc_vp != NULL) 940 error = EADDRINUSE; 941 if (error) 942 goto done; 943 944 VATTR_NULL(&vattr); 945 vattr.va_type = VSOCK; 946 vattr.va_mode = (ACCESSPERMS & ~p->p_fd->fd_cmask); 947 error = VOP_NCREATE(&nd.nl_nch, nd.nl_dvp, &vp, nd.nl_cred, &vattr); 948 if (error == 0) { 949 if (unp->unp_vnode == NULL) { 950 vp->v_socket = unp->unp_socket; 951 unp->unp_vnode = vp; 952 unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam); 953 vn_unlock(vp); 954 } else { 955 vput(vp); /* late race */ 956 error = EINVAL; 957 } 958 } 959 done: 960 nlookup_done(&nd); 961 failed: 962 lwkt_reltoken(&unp_token); 963 return (error); 964 } 965 966 static int 967 unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 968 { 969 struct proc *p = td->td_proc; 970 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 971 struct vnode *vp; 972 struct socket *so2, *so3; 973 struct unpcb *unp, *unp2, *unp3; 974 int error, len; 975 struct nlookupdata nd; 976 char buf[SOCK_MAXADDRLEN]; 977 978 lwkt_gettoken(&unp_token); 979 980 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); 981 if (len <= 0) { 982 error = EINVAL; 983 goto failed; 984 } 985 strncpy(buf, soun->sun_path, len); 986 buf[len] = 0; 987 988 vp = NULL; 989 error = nlookup_init(&nd, buf, UIO_SYSSPACE, NLC_FOLLOW); 990 if (error == 0) 991 error = nlookup(&nd); 992 if (error == 0) 993 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 994 nlookup_done(&nd); 995 if (error) 996 goto failed; 997 998 if (vp->v_type != VSOCK) { 999 error = ENOTSOCK; 1000 goto bad; 1001 } 1002 error = VOP_EACCESS(vp, VWRITE, p->p_ucred); 1003 if (error) 1004 goto bad; 1005 so2 = vp->v_socket; 1006 if (so2 == NULL) { 1007 error = ECONNREFUSED; 1008 goto bad; 1009 } 1010 if (so->so_type != so2->so_type) { 1011 error = EPROTOTYPE; 1012 goto bad; 1013 } 1014 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 1015 if (!(so2->so_options & SO_ACCEPTCONN) || 1016 (so3 = sonewconn(so2, 0)) == NULL) { 1017 error = ECONNREFUSED; 1018 goto bad; 1019 } 1020 unp = so->so_pcb; 1021 if (unp->unp_conn) { /* race, already connected! */ 1022 error = EISCONN; 1023 sofree(so3); 1024 goto bad; 1025 } 1026 unp2 = so2->so_pcb; 1027 unp3 = so3->so_pcb; 1028 if (unp2->unp_addr) 1029 unp3->unp_addr = (struct sockaddr_un *) 1030 dup_sockaddr((struct sockaddr *)unp2->unp_addr); 1031 1032 /* 1033 * unp_peercred management: 1034 * 1035 * The connecter's (client's) credentials are copied 1036 * from its process structure at the time of connect() 1037 * (which is now). 1038 */ 1039 cru2x(p->p_ucred, &unp3->unp_peercred); 1040 unp3->unp_flags |= UNP_HAVEPC; 1041 /* 1042 * The receiver's (server's) credentials are copied 1043 * from the unp_peercred member of socket on which the 1044 * former called listen(); unp_listen() cached that 1045 * process's credentials at that time so we can use 1046 * them now. 1047 */ 1048 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED, 1049 ("unp_connect: listener without cached peercred")); 1050 memcpy(&unp->unp_peercred, &unp2->unp_peercred, 1051 sizeof(unp->unp_peercred)); 1052 unp->unp_flags |= UNP_HAVEPC; 1053 1054 so2 = so3; 1055 } 1056 error = unp_connect2(so, so2); 1057 bad: 1058 vput(vp); 1059 failed: 1060 lwkt_reltoken(&unp_token); 1061 return (error); 1062 } 1063 1064 /* 1065 * Connect two unix domain sockets together. 1066 * 1067 * NOTE: Semantics for any change to unp_conn requires that the per-unp 1068 * pool token also be held. 1069 */ 1070 int 1071 unp_connect2(struct socket *so, struct socket *so2) 1072 { 1073 struct unpcb *unp; 1074 struct unpcb *unp2; 1075 1076 lwkt_gettoken(&unp_token); 1077 if (so2->so_type != so->so_type) { 1078 lwkt_reltoken(&unp_token); 1079 return (EPROTOTYPE); 1080 } 1081 unp = unp_getsocktoken(so); 1082 unp2 = unp_getsocktoken(so2); 1083 1084 unp->unp_conn = unp2; 1085 1086 switch (so->so_type) { 1087 case SOCK_DGRAM: 1088 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); 1089 soisconnected(so); 1090 break; 1091 1092 case SOCK_STREAM: 1093 case SOCK_SEQPACKET: 1094 unp2->unp_conn = unp; 1095 soisconnected(so); 1096 soisconnected(so2); 1097 break; 1098 1099 default: 1100 panic("unp_connect2"); 1101 } 1102 1103 unp_reltoken(unp2); 1104 unp_reltoken(unp); 1105 lwkt_reltoken(&unp_token); 1106 return (0); 1107 } 1108 1109 /* 1110 * Disconnect a unix domain socket pair. 1111 * 1112 * NOTE: Semantics for any change to unp_conn requires that the per-unp 1113 * pool token also be held. 1114 */ 1115 static void 1116 unp_disconnect(struct unpcb *unp) 1117 { 1118 struct unpcb *unp2; 1119 1120 lwkt_gettoken(&unp_token); 1121 lwkt_getpooltoken(unp); 1122 1123 while ((unp2 = unp->unp_conn) != NULL) { 1124 lwkt_getpooltoken(unp2); 1125 if (unp2 == unp->unp_conn) 1126 break; 1127 lwkt_relpooltoken(unp2); 1128 } 1129 if (unp2 == NULL) 1130 goto done; 1131 1132 unp->unp_conn = NULL; 1133 1134 switch (unp->unp_socket->so_type) { 1135 case SOCK_DGRAM: 1136 LIST_REMOVE(unp, unp_reflink); 1137 soclrstate(unp->unp_socket, SS_ISCONNECTED); 1138 break; 1139 1140 case SOCK_STREAM: 1141 case SOCK_SEQPACKET: 1142 unp_reference(unp2); 1143 unp2->unp_conn = NULL; 1144 1145 soisdisconnected(unp->unp_socket); 1146 soisdisconnected(unp2->unp_socket); 1147 1148 unp_free(unp2); 1149 break; 1150 } 1151 lwkt_relpooltoken(unp2); 1152 done: 1153 lwkt_relpooltoken(unp); 1154 lwkt_reltoken(&unp_token); 1155 } 1156 1157 #ifdef notdef 1158 void 1159 unp_abort(struct unpcb *unp) 1160 { 1161 lwkt_gettoken(&unp_token); 1162 unp_free(unp); 1163 lwkt_reltoken(&unp_token); 1164 } 1165 #endif 1166 1167 static int 1168 prison_unpcb(struct thread *td, struct unpcb *unp) 1169 { 1170 struct proc *p; 1171 1172 if (td == NULL) 1173 return (0); 1174 if ((p = td->td_proc) == NULL) 1175 return (0); 1176 if (!p->p_ucred->cr_prison) 1177 return (0); 1178 if (p->p_fd->fd_rdir == unp->unp_rvnode) 1179 return (0); 1180 return (1); 1181 } 1182 1183 static int 1184 unp_pcblist(SYSCTL_HANDLER_ARGS) 1185 { 1186 int error, i, n; 1187 struct unpcb *unp, **unp_list; 1188 unp_gen_t gencnt; 1189 struct unp_head *head; 1190 1191 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); 1192 1193 KKASSERT(curproc != NULL); 1194 1195 /* 1196 * The process of preparing the PCB list is too time-consuming and 1197 * resource-intensive to repeat twice on every request. 1198 */ 1199 if (req->oldptr == NULL) { 1200 n = unp_count; 1201 req->oldidx = (n + n/8) * sizeof(struct xunpcb); 1202 return 0; 1203 } 1204 1205 if (req->newptr != NULL) 1206 return EPERM; 1207 1208 lwkt_gettoken(&unp_token); 1209 1210 /* 1211 * OK, now we're committed to doing something. 1212 */ 1213 gencnt = unp_gencnt; 1214 n = unp_count; 1215 1216 unp_list = kmalloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); 1217 1218 for (unp = LIST_FIRST(head), i = 0; unp && i < n; 1219 unp = LIST_NEXT(unp, unp_link)) { 1220 if (unp->unp_gencnt <= gencnt && !prison_unpcb(req->td, unp)) 1221 unp_list[i++] = unp; 1222 } 1223 n = i; /* in case we lost some during malloc */ 1224 1225 error = 0; 1226 for (i = 0; i < n; i++) { 1227 unp = unp_list[i]; 1228 if (unp->unp_gencnt <= gencnt) { 1229 struct xunpcb xu; 1230 xu.xu_len = sizeof xu; 1231 xu.xu_unpp = unp; 1232 /* 1233 * XXX - need more locking here to protect against 1234 * connect/disconnect races for SMP. 1235 */ 1236 if (unp->unp_addr) 1237 bcopy(unp->unp_addr, &xu.xu_addr, 1238 unp->unp_addr->sun_len); 1239 if (unp->unp_conn && unp->unp_conn->unp_addr) 1240 bcopy(unp->unp_conn->unp_addr, 1241 &xu.xu_caddr, 1242 unp->unp_conn->unp_addr->sun_len); 1243 bcopy(unp, &xu.xu_unp, sizeof *unp); 1244 sotoxsocket(unp->unp_socket, &xu.xu_socket); 1245 error = SYSCTL_OUT(req, &xu, sizeof xu); 1246 } 1247 } 1248 lwkt_reltoken(&unp_token); 1249 kfree(unp_list, M_TEMP); 1250 1251 return error; 1252 } 1253 1254 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, 1255 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", 1256 "List of active local datagram sockets"); 1257 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, 1258 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", 1259 "List of active local stream sockets"); 1260 SYSCTL_PROC(_net_local_seqpacket, OID_AUTO, pcblist, CTLFLAG_RD, 1261 (caddr_t)(long)SOCK_SEQPACKET, 0, unp_pcblist, "S,xunpcb", 1262 "List of active local seqpacket stream sockets"); 1263 1264 static void 1265 unp_shutdown(struct unpcb *unp) 1266 { 1267 struct socket *so; 1268 1269 if ((unp->unp_socket->so_type == SOCK_STREAM || 1270 unp->unp_socket->so_type == SOCK_SEQPACKET) && 1271 unp->unp_conn != NULL && (so = unp->unp_conn->unp_socket)) { 1272 socantrcvmore(so); 1273 } 1274 } 1275 1276 static void 1277 unp_drop(struct unpcb *unp, int err) 1278 { 1279 struct socket *so = unp->unp_socket; 1280 1281 so->so_error = err; 1282 unp_disconnect(unp); 1283 } 1284 1285 #ifdef notdef 1286 void 1287 unp_drain(void) 1288 { 1289 lwkt_gettoken(&unp_token); 1290 lwkt_reltoken(&unp_token); 1291 } 1292 #endif 1293 1294 int 1295 unp_externalize(struct mbuf *rights) 1296 { 1297 struct thread *td = curthread; 1298 struct proc *p = td->td_proc; /* XXX */ 1299 struct lwp *lp = td->td_lwp; 1300 struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 1301 int *fdp; 1302 int i; 1303 struct file **rp; 1304 struct file *fp; 1305 int newfds = (cm->cmsg_len - (CMSG_DATA(cm) - (u_char *)cm)) 1306 / sizeof (struct file *); 1307 int f; 1308 1309 lwkt_gettoken(&unp_token); 1310 1311 /* 1312 * if the new FD's will not fit, then we free them all 1313 */ 1314 if (!fdavail(p, newfds)) { 1315 rp = (struct file **)CMSG_DATA(cm); 1316 for (i = 0; i < newfds; i++) { 1317 fp = *rp; 1318 /* 1319 * zero the pointer before calling unp_discard, 1320 * since it may end up in unp_gc().. 1321 */ 1322 *rp++ = NULL; 1323 unp_discard(fp, NULL); 1324 } 1325 lwkt_reltoken(&unp_token); 1326 return (EMSGSIZE); 1327 } 1328 1329 /* 1330 * now change each pointer to an fd in the global table to 1331 * an integer that is the index to the local fd table entry 1332 * that we set up to point to the global one we are transferring. 1333 * If sizeof (struct file *) is bigger than or equal to sizeof int, 1334 * then do it in forward order. In that case, an integer will 1335 * always come in the same place or before its corresponding 1336 * struct file pointer. 1337 * If sizeof (struct file *) is smaller than sizeof int, then 1338 * do it in reverse order. 1339 */ 1340 if (sizeof (struct file *) >= sizeof (int)) { 1341 fdp = (int *)CMSG_DATA(cm); 1342 rp = (struct file **)CMSG_DATA(cm); 1343 for (i = 0; i < newfds; i++) { 1344 if (fdalloc(p, 0, &f)) 1345 panic("unp_externalize"); 1346 fp = *rp++; 1347 unp_fp_externalize(lp, fp, f); 1348 *fdp++ = f; 1349 } 1350 } else { 1351 fdp = (int *)CMSG_DATA(cm) + newfds - 1; 1352 rp = (struct file **)CMSG_DATA(cm) + newfds - 1; 1353 for (i = 0; i < newfds; i++) { 1354 if (fdalloc(p, 0, &f)) 1355 panic("unp_externalize"); 1356 fp = *rp--; 1357 unp_fp_externalize(lp, fp, f); 1358 *fdp-- = f; 1359 } 1360 } 1361 1362 /* 1363 * Adjust length, in case sizeof(struct file *) and sizeof(int) 1364 * differs. 1365 */ 1366 cm->cmsg_len = CMSG_LEN(newfds * sizeof(int)); 1367 rights->m_len = cm->cmsg_len; 1368 1369 lwkt_reltoken(&unp_token); 1370 return (0); 1371 } 1372 1373 static void 1374 unp_fp_externalize(struct lwp *lp, struct file *fp, int fd) 1375 { 1376 struct file *fx; 1377 int error; 1378 1379 lwkt_gettoken(&unp_token); 1380 1381 if (lp) { 1382 KKASSERT(fd >= 0); 1383 if (fp->f_flag & FREVOKED) { 1384 kprintf("Warning: revoked fp exiting unix socket\n"); 1385 fx = NULL; 1386 error = falloc(lp, &fx, NULL); 1387 if (error == 0) 1388 fsetfd(lp->lwp_proc->p_fd, fx, fd); 1389 else 1390 fsetfd(lp->lwp_proc->p_fd, NULL, fd); 1391 fdrop(fx); 1392 } else { 1393 fsetfd(lp->lwp_proc->p_fd, fp, fd); 1394 } 1395 } 1396 spin_lock(&unp_spin); 1397 fp->f_msgcount--; 1398 unp_rights--; 1399 spin_unlock(&unp_spin); 1400 fdrop(fp); 1401 1402 lwkt_reltoken(&unp_token); 1403 } 1404 1405 1406 void 1407 unp_init(void) 1408 { 1409 LIST_INIT(&unp_dhead); 1410 LIST_INIT(&unp_shead); 1411 spin_init(&unp_spin, "unpinit"); 1412 } 1413 1414 static int 1415 unp_internalize(struct mbuf *control, struct thread *td) 1416 { 1417 struct proc *p = td->td_proc; 1418 struct filedesc *fdescp; 1419 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1420 struct file **rp; 1421 struct file *fp; 1422 int i, fd, *fdp; 1423 struct cmsgcred *cmcred; 1424 int oldfds; 1425 u_int newlen; 1426 int error; 1427 1428 KKASSERT(p); 1429 lwkt_gettoken(&unp_token); 1430 1431 fdescp = p->p_fd; 1432 if ((cm->cmsg_type != SCM_RIGHTS && cm->cmsg_type != SCM_CREDS) || 1433 cm->cmsg_level != SOL_SOCKET || 1434 CMSG_ALIGN(cm->cmsg_len) != control->m_len) { 1435 error = EINVAL; 1436 goto done; 1437 } 1438 1439 /* 1440 * Fill in credential information. 1441 */ 1442 if (cm->cmsg_type == SCM_CREDS) { 1443 cmcred = (struct cmsgcred *)CMSG_DATA(cm); 1444 cmcred->cmcred_pid = p->p_pid; 1445 cmcred->cmcred_uid = p->p_ucred->cr_ruid; 1446 cmcred->cmcred_gid = p->p_ucred->cr_rgid; 1447 cmcred->cmcred_euid = p->p_ucred->cr_uid; 1448 cmcred->cmcred_ngroups = MIN(p->p_ucred->cr_ngroups, 1449 CMGROUP_MAX); 1450 for (i = 0; i < cmcred->cmcred_ngroups; i++) 1451 cmcred->cmcred_groups[i] = p->p_ucred->cr_groups[i]; 1452 error = 0; 1453 goto done; 1454 } 1455 1456 /* 1457 * cmsghdr may not be aligned, do not allow calculation(s) to 1458 * go negative. 1459 */ 1460 if (cm->cmsg_len < CMSG_LEN(0)) { 1461 error = EINVAL; 1462 goto done; 1463 } 1464 1465 oldfds = (cm->cmsg_len - CMSG_LEN(0)) / sizeof (int); 1466 1467 /* 1468 * check that all the FDs passed in refer to legal OPEN files 1469 * If not, reject the entire operation. 1470 */ 1471 fdp = (int *)CMSG_DATA(cm); 1472 for (i = 0; i < oldfds; i++) { 1473 fd = *fdp++; 1474 if ((unsigned)fd >= fdescp->fd_nfiles || 1475 fdescp->fd_files[fd].fp == NULL) { 1476 error = EBADF; 1477 goto done; 1478 } 1479 if (fdescp->fd_files[fd].fp->f_type == DTYPE_KQUEUE) { 1480 error = EOPNOTSUPP; 1481 goto done; 1482 } 1483 } 1484 /* 1485 * Now replace the integer FDs with pointers to 1486 * the associated global file table entry.. 1487 * Allocate a bigger buffer as necessary. But if an cluster is not 1488 * enough, return E2BIG. 1489 */ 1490 newlen = CMSG_LEN(oldfds * sizeof(struct file *)); 1491 if (newlen > MCLBYTES) { 1492 error = E2BIG; 1493 goto done; 1494 } 1495 if (newlen - control->m_len > M_TRAILINGSPACE(control)) { 1496 if (control->m_flags & M_EXT) { 1497 error = E2BIG; 1498 goto done; 1499 } 1500 MCLGET(control, M_WAITOK); 1501 if (!(control->m_flags & M_EXT)) { 1502 error = ENOBUFS; 1503 goto done; 1504 } 1505 1506 /* copy the data to the cluster */ 1507 memcpy(mtod(control, char *), cm, cm->cmsg_len); 1508 cm = mtod(control, struct cmsghdr *); 1509 } 1510 1511 /* 1512 * Adjust length, in case sizeof(struct file *) and sizeof(int) 1513 * differs. 1514 */ 1515 cm->cmsg_len = newlen; 1516 control->m_len = CMSG_ALIGN(newlen); 1517 1518 /* 1519 * Transform the file descriptors into struct file pointers. 1520 * If sizeof (struct file *) is bigger than or equal to sizeof int, 1521 * then do it in reverse order so that the int won't get until 1522 * we're done. 1523 * If sizeof (struct file *) is smaller than sizeof int, then 1524 * do it in forward order. 1525 */ 1526 if (sizeof (struct file *) >= sizeof (int)) { 1527 fdp = (int *)CMSG_DATA(cm) + oldfds - 1; 1528 rp = (struct file **)CMSG_DATA(cm) + oldfds - 1; 1529 for (i = 0; i < oldfds; i++) { 1530 fp = fdescp->fd_files[*fdp--].fp; 1531 *rp-- = fp; 1532 fhold(fp); 1533 spin_lock(&unp_spin); 1534 fp->f_msgcount++; 1535 unp_rights++; 1536 spin_unlock(&unp_spin); 1537 } 1538 } else { 1539 fdp = (int *)CMSG_DATA(cm); 1540 rp = (struct file **)CMSG_DATA(cm); 1541 for (i = 0; i < oldfds; i++) { 1542 fp = fdescp->fd_files[*fdp++].fp; 1543 *rp++ = fp; 1544 fhold(fp); 1545 spin_lock(&unp_spin); 1546 fp->f_msgcount++; 1547 unp_rights++; 1548 spin_unlock(&unp_spin); 1549 } 1550 } 1551 error = 0; 1552 done: 1553 lwkt_reltoken(&unp_token); 1554 return error; 1555 } 1556 1557 /* 1558 * Garbage collect in-transit file descriptors that get lost due to 1559 * loops (i.e. when a socket is sent to another process over itself, 1560 * and more complex situations). 1561 * 1562 * NOT MPSAFE - TODO socket flush code and maybe closef. Rest is MPSAFE. 1563 */ 1564 1565 struct unp_gc_info { 1566 struct file **extra_ref; 1567 struct file *locked_fp; 1568 int defer; 1569 int index; 1570 int maxindex; 1571 }; 1572 1573 static void 1574 unp_gc(void) 1575 { 1576 struct unp_gc_info info; 1577 static boolean_t unp_gcing; 1578 struct file **fpp; 1579 int i; 1580 1581 /* 1582 * Only one gc can be in-progress at any given moment 1583 */ 1584 spin_lock(&unp_spin); 1585 if (unp_gcing) { 1586 spin_unlock(&unp_spin); 1587 return; 1588 } 1589 unp_gcing = TRUE; 1590 spin_unlock(&unp_spin); 1591 1592 lwkt_gettoken(&unp_token); 1593 1594 /* 1595 * Before going through all this, set all FDs to be NOT defered 1596 * and NOT externally accessible (not marked). During the scan 1597 * a fd can be marked externally accessible but we may or may not 1598 * be able to immediately process it (controlled by FDEFER). 1599 * 1600 * If we loop sleep a bit. The complexity of the topology can cause 1601 * multiple loops. Also failure to acquire the socket's so_rcv 1602 * token can cause us to loop. 1603 */ 1604 allfiles_scan_exclusive(unp_gc_clearmarks, NULL); 1605 do { 1606 info.defer = 0; 1607 allfiles_scan_exclusive(unp_gc_checkmarks, &info); 1608 if (info.defer) 1609 tsleep(&info, 0, "gcagain", 1); 1610 } while (info.defer); 1611 1612 /* 1613 * We grab an extra reference to each of the file table entries 1614 * that are not otherwise accessible and then free the rights 1615 * that are stored in messages on them. 1616 * 1617 * The bug in the orginal code is a little tricky, so I'll describe 1618 * what's wrong with it here. 1619 * 1620 * It is incorrect to simply unp_discard each entry for f_msgcount 1621 * times -- consider the case of sockets A and B that contain 1622 * references to each other. On a last close of some other socket, 1623 * we trigger a gc since the number of outstanding rights (unp_rights) 1624 * is non-zero. If during the sweep phase the gc code un_discards, 1625 * we end up doing a (full) closef on the descriptor. A closef on A 1626 * results in the following chain. Closef calls soo_close, which 1627 * calls soclose. Soclose calls first (through the switch 1628 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 1629 * returns because the previous instance had set unp_gcing, and 1630 * we return all the way back to soclose, which marks the socket 1631 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 1632 * to free up the rights that are queued in messages on the socket A, 1633 * i.e., the reference on B. The sorflush calls via the dom_dispose 1634 * switch unp_dispose, which unp_scans with unp_discard. This second 1635 * instance of unp_discard just calls closef on B. 1636 * 1637 * Well, a similar chain occurs on B, resulting in a sorflush on B, 1638 * which results in another closef on A. Unfortunately, A is already 1639 * being closed, and the descriptor has already been marked with 1640 * SS_NOFDREF, and soclose panics at this point. 1641 * 1642 * Here, we first take an extra reference to each inaccessible 1643 * descriptor. Then, we call sorflush ourself, since we know 1644 * it is a Unix domain socket anyhow. After we destroy all the 1645 * rights carried in messages, we do a last closef to get rid 1646 * of our extra reference. This is the last close, and the 1647 * unp_detach etc will shut down the socket. 1648 * 1649 * 91/09/19, bsy@cs.cmu.edu 1650 */ 1651 info.extra_ref = kmalloc(256 * sizeof(struct file *), M_FILE, M_WAITOK); 1652 info.maxindex = 256; 1653 1654 do { 1655 /* 1656 * Look for matches 1657 */ 1658 info.index = 0; 1659 allfiles_scan_exclusive(unp_gc_checkrefs, &info); 1660 1661 /* 1662 * For each FD on our hit list, do the following two things 1663 */ 1664 for (i = info.index, fpp = info.extra_ref; --i >= 0; ++fpp) { 1665 struct file *tfp = *fpp; 1666 if (tfp->f_type == DTYPE_SOCKET && tfp->f_data != NULL) 1667 sorflush((struct socket *)(tfp->f_data)); 1668 } 1669 for (i = info.index, fpp = info.extra_ref; --i >= 0; ++fpp) 1670 closef(*fpp, NULL); 1671 } while (info.index == info.maxindex); 1672 1673 lwkt_reltoken(&unp_token); 1674 1675 kfree((caddr_t)info.extra_ref, M_FILE); 1676 unp_gcing = FALSE; 1677 } 1678 1679 /* 1680 * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry 1681 */ 1682 static int 1683 unp_gc_checkrefs(struct file *fp, void *data) 1684 { 1685 struct unp_gc_info *info = data; 1686 1687 if (fp->f_count == 0) 1688 return(0); 1689 if (info->index == info->maxindex) 1690 return(-1); 1691 1692 /* 1693 * If all refs are from msgs, and it's not marked accessible 1694 * then it must be referenced from some unreachable cycle 1695 * of (shut-down) FDs, so include it in our 1696 * list of FDs to remove 1697 */ 1698 if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) { 1699 info->extra_ref[info->index++] = fp; 1700 fhold(fp); 1701 } 1702 return(0); 1703 } 1704 1705 /* 1706 * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry 1707 */ 1708 static int 1709 unp_gc_clearmarks(struct file *fp, void *data __unused) 1710 { 1711 atomic_clear_int(&fp->f_flag, FMARK | FDEFER); 1712 return(0); 1713 } 1714 1715 /* 1716 * MPSAFE - NOTE: filehead list and file pointer spinlocked on entry 1717 */ 1718 static int 1719 unp_gc_checkmarks(struct file *fp, void *data) 1720 { 1721 struct unp_gc_info *info = data; 1722 struct socket *so; 1723 1724 /* 1725 * If the file is not open, skip it. Make sure it isn't marked 1726 * defered or we could loop forever, in case we somehow race 1727 * something. 1728 */ 1729 if (fp->f_count == 0) { 1730 if (fp->f_flag & FDEFER) 1731 atomic_clear_int(&fp->f_flag, FDEFER); 1732 return(0); 1733 } 1734 /* 1735 * If we already marked it as 'defer' in a 1736 * previous pass, then try process it this time 1737 * and un-mark it 1738 */ 1739 if (fp->f_flag & FDEFER) { 1740 atomic_clear_int(&fp->f_flag, FDEFER); 1741 } else { 1742 /* 1743 * if it's not defered, then check if it's 1744 * already marked.. if so skip it 1745 */ 1746 if (fp->f_flag & FMARK) 1747 return(0); 1748 /* 1749 * If all references are from messages 1750 * in transit, then skip it. it's not 1751 * externally accessible. 1752 */ 1753 if (fp->f_count == fp->f_msgcount) 1754 return(0); 1755 /* 1756 * If it got this far then it must be 1757 * externally accessible. 1758 */ 1759 atomic_set_int(&fp->f_flag, FMARK); 1760 } 1761 1762 /* 1763 * either it was defered, or it is externally 1764 * accessible and not already marked so. 1765 * Now check if it is possibly one of OUR sockets. 1766 */ 1767 if (fp->f_type != DTYPE_SOCKET || 1768 (so = (struct socket *)fp->f_data) == NULL) { 1769 return(0); 1770 } 1771 if (so->so_proto->pr_domain != &localdomain || 1772 !(so->so_proto->pr_flags & PR_RIGHTS)) { 1773 return(0); 1774 } 1775 1776 /* 1777 * So, Ok, it's one of our sockets and it IS externally accessible 1778 * (or was defered). Now we look to see if we hold any file 1779 * descriptors in its message buffers. Follow those links and mark 1780 * them as accessible too. 1781 * 1782 * We are holding multiple spinlocks here, if we cannot get the 1783 * token non-blocking defer until the next loop. 1784 */ 1785 info->locked_fp = fp; 1786 if (lwkt_trytoken(&so->so_rcv.ssb_token)) { 1787 unp_scan(so->so_rcv.ssb_mb, unp_mark, info); 1788 lwkt_reltoken(&so->so_rcv.ssb_token); 1789 } else { 1790 atomic_set_int(&fp->f_flag, FDEFER); 1791 ++info->defer; 1792 } 1793 return (0); 1794 } 1795 1796 /* 1797 * Scan all unix domain sockets and replace any revoked file pointers 1798 * found with the dummy file pointer fx. We don't worry about races 1799 * against file pointers being read out as those are handled in the 1800 * externalize code. 1801 */ 1802 1803 #define REVOKE_GC_MAXFILES 32 1804 1805 struct unp_revoke_gc_info { 1806 struct file *fx; 1807 struct file *fary[REVOKE_GC_MAXFILES]; 1808 int fcount; 1809 }; 1810 1811 void 1812 unp_revoke_gc(struct file *fx) 1813 { 1814 struct unp_revoke_gc_info info; 1815 int i; 1816 1817 lwkt_gettoken(&unp_token); 1818 info.fx = fx; 1819 do { 1820 info.fcount = 0; 1821 allfiles_scan_exclusive(unp_revoke_gc_check, &info); 1822 for (i = 0; i < info.fcount; ++i) 1823 unp_fp_externalize(NULL, info.fary[i], -1); 1824 } while (info.fcount == REVOKE_GC_MAXFILES); 1825 lwkt_reltoken(&unp_token); 1826 } 1827 1828 /* 1829 * Check for and replace revoked descriptors. 1830 * 1831 * WARNING: This routine is not allowed to block. 1832 */ 1833 static int 1834 unp_revoke_gc_check(struct file *fps, void *vinfo) 1835 { 1836 struct unp_revoke_gc_info *info = vinfo; 1837 struct file *fp; 1838 struct socket *so; 1839 struct mbuf *m0; 1840 struct mbuf *m; 1841 struct file **rp; 1842 struct cmsghdr *cm; 1843 int i; 1844 int qfds; 1845 1846 /* 1847 * Is this a unix domain socket with rights-passing abilities? 1848 */ 1849 if (fps->f_type != DTYPE_SOCKET) 1850 return (0); 1851 if ((so = (struct socket *)fps->f_data) == NULL) 1852 return(0); 1853 if (so->so_proto->pr_domain != &localdomain) 1854 return(0); 1855 if ((so->so_proto->pr_flags & PR_RIGHTS) == 0) 1856 return(0); 1857 1858 /* 1859 * Scan the mbufs for control messages and replace any revoked 1860 * descriptors we find. 1861 */ 1862 lwkt_gettoken(&so->so_rcv.ssb_token); 1863 m0 = so->so_rcv.ssb_mb; 1864 while (m0) { 1865 for (m = m0; m; m = m->m_next) { 1866 if (m->m_type != MT_CONTROL) 1867 continue; 1868 if (m->m_len < sizeof(*cm)) 1869 continue; 1870 cm = mtod(m, struct cmsghdr *); 1871 if (cm->cmsg_level != SOL_SOCKET || 1872 cm->cmsg_type != SCM_RIGHTS) { 1873 continue; 1874 } 1875 qfds = (cm->cmsg_len - CMSG_LEN(0)) / sizeof(void *); 1876 rp = (struct file **)CMSG_DATA(cm); 1877 for (i = 0; i < qfds; i++) { 1878 fp = rp[i]; 1879 if (fp->f_flag & FREVOKED) { 1880 kprintf("Warning: Removing revoked fp from unix domain socket queue\n"); 1881 fhold(info->fx); 1882 info->fx->f_msgcount++; 1883 unp_rights++; 1884 rp[i] = info->fx; 1885 info->fary[info->fcount++] = fp; 1886 } 1887 if (info->fcount == REVOKE_GC_MAXFILES) 1888 break; 1889 } 1890 if (info->fcount == REVOKE_GC_MAXFILES) 1891 break; 1892 } 1893 m0 = m0->m_nextpkt; 1894 if (info->fcount == REVOKE_GC_MAXFILES) 1895 break; 1896 } 1897 lwkt_reltoken(&so->so_rcv.ssb_token); 1898 1899 /* 1900 * Stop the scan if we filled up our array. 1901 */ 1902 if (info->fcount == REVOKE_GC_MAXFILES) 1903 return(-1); 1904 return(0); 1905 } 1906 1907 /* 1908 * Dispose of the fp's stored in a mbuf. 1909 * 1910 * The dds loop can cause additional fps to be entered onto the 1911 * list while it is running, flattening out the operation and avoiding 1912 * a deep kernel stack recursion. 1913 */ 1914 void 1915 unp_dispose(struct mbuf *m) 1916 { 1917 unp_defdiscard_t dds; 1918 1919 lwkt_gettoken(&unp_token); 1920 ++unp_defdiscard_nest; 1921 if (m) { 1922 unp_scan(m, unp_discard, NULL); 1923 } 1924 if (unp_defdiscard_nest == 1) { 1925 while ((dds = unp_defdiscard_base) != NULL) { 1926 unp_defdiscard_base = dds->next; 1927 closef(dds->fp, NULL); 1928 kfree(dds, M_UNPCB); 1929 } 1930 } 1931 --unp_defdiscard_nest; 1932 lwkt_reltoken(&unp_token); 1933 } 1934 1935 static int 1936 unp_listen(struct unpcb *unp, struct thread *td) 1937 { 1938 struct proc *p = td->td_proc; 1939 1940 KKASSERT(p); 1941 lwkt_gettoken(&unp_token); 1942 cru2x(p->p_ucred, &unp->unp_peercred); 1943 unp->unp_flags |= UNP_HAVEPCCACHED; 1944 lwkt_reltoken(&unp_token); 1945 return (0); 1946 } 1947 1948 static void 1949 unp_scan(struct mbuf *m0, void (*op)(struct file *, void *), void *data) 1950 { 1951 struct mbuf *m; 1952 struct file **rp; 1953 struct cmsghdr *cm; 1954 int i; 1955 int qfds; 1956 1957 while (m0) { 1958 for (m = m0; m; m = m->m_next) { 1959 if (m->m_type == MT_CONTROL && 1960 m->m_len >= sizeof(*cm)) { 1961 cm = mtod(m, struct cmsghdr *); 1962 if (cm->cmsg_level != SOL_SOCKET || 1963 cm->cmsg_type != SCM_RIGHTS) 1964 continue; 1965 qfds = (cm->cmsg_len - CMSG_LEN(0)) / 1966 sizeof(void *); 1967 rp = (struct file **)CMSG_DATA(cm); 1968 for (i = 0; i < qfds; i++) 1969 (*op)(*rp++, data); 1970 break; /* XXX, but saves time */ 1971 } 1972 } 1973 m0 = m0->m_nextpkt; 1974 } 1975 } 1976 1977 /* 1978 * Mark visibility. info->defer is recalculated on every pass. 1979 */ 1980 static void 1981 unp_mark(struct file *fp, void *data) 1982 { 1983 struct unp_gc_info *info = data; 1984 1985 if ((fp->f_flag & FMARK) == 0) { 1986 ++info->defer; 1987 atomic_set_int(&fp->f_flag, FMARK | FDEFER); 1988 } else if (fp->f_flag & FDEFER) { 1989 ++info->defer; 1990 } 1991 } 1992 1993 /* 1994 * Discard a fp previously held in a unix domain socket mbuf. To 1995 * avoid blowing out the kernel stack due to contrived chain-reactions 1996 * we may have to defer the operation to a higher procedural level. 1997 * 1998 * Caller holds unp_token 1999 */ 2000 static void 2001 unp_discard(struct file *fp, void *data __unused) 2002 { 2003 unp_defdiscard_t dds; 2004 2005 spin_lock(&unp_spin); 2006 fp->f_msgcount--; 2007 unp_rights--; 2008 spin_unlock(&unp_spin); 2009 2010 if (unp_defdiscard_nest) { 2011 dds = kmalloc(sizeof(*dds), M_UNPCB, M_WAITOK|M_ZERO); 2012 dds->fp = fp; 2013 dds->next = unp_defdiscard_base; 2014 unp_defdiscard_base = dds; 2015 } else { 2016 closef(fp, NULL); 2017 } 2018 } 2019 2020