1 /* $OpenBSD: uipc_usrreq.c,v 1.205 2024/05/02 17:10:55 mvs Exp $ */ 2 /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/filedesc.h> 39 #include <sys/domain.h> 40 #include <sys/protosw.h> 41 #include <sys/queue.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/unpcb.h> 45 #include <sys/un.h> 46 #include <sys/namei.h> 47 #include <sys/vnode.h> 48 #include <sys/file.h> 49 #include <sys/stat.h> 50 #include <sys/mbuf.h> 51 #include <sys/task.h> 52 #include <sys/pledge.h> 53 #include <sys/pool.h> 54 #include <sys/rwlock.h> 55 #include <sys/mutex.h> 56 #include <sys/sysctl.h> 57 #include <sys/lock.h> 58 #include <sys/refcnt.h> 59 60 #include "kcov.h" 61 #if NKCOV > 0 62 #include <sys/kcov.h> 63 #endif 64 65 /* 66 * Locks used to protect global data and struct members: 67 * I immutable after creation 68 * D unp_df_lock 69 * G unp_gc_lock 70 * M unp_ino_mtx 71 * R unp_rights_mtx 72 * a atomic 73 * s socket lock 74 */ 75 76 struct rwlock unp_df_lock = RWLOCK_INITIALIZER("unpdflk"); 77 struct rwlock unp_gc_lock = RWLOCK_INITIALIZER("unpgclk"); 78 79 struct mutex unp_rights_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); 80 struct mutex unp_ino_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); 81 82 /* 83 * Stack of sets of files that were passed over a socket but were 84 * not received and need to be closed. 85 */ 86 struct unp_deferral { 87 SLIST_ENTRY(unp_deferral) ud_link; /* [D] */ 88 int ud_n; /* [I] */ 89 /* followed by ud_n struct fdpass */ 90 struct fdpass ud_fp[]; /* [I] */ 91 }; 92 93 void uipc_setaddr(const struct unpcb *, struct mbuf *); 94 void unp_discard(struct fdpass *, int); 95 void unp_remove_gcrefs(struct fdpass *, int); 96 void unp_restore_gcrefs(struct fdpass *, int); 97 void unp_scan(struct mbuf *, void (*)(struct fdpass *, int)); 98 int unp_nam2sun(struct mbuf *, struct sockaddr_un **, size_t *); 99 static inline void unp_ref(struct unpcb *); 100 static inline void unp_rele(struct unpcb *); 101 struct socket *unp_solock_peer(struct socket *); 102 103 struct pool unpcb_pool; 104 struct task unp_gc_task = TASK_INITIALIZER(unp_gc, NULL); 105 106 /* 107 * Unix communications domain. 108 * 109 * TODO: 110 * RDM 111 * rethink name space problems 112 * need a proper out-of-band 113 */ 114 const struct sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX }; 115 116 /* [G] list of all UNIX domain sockets, for unp_gc() */ 117 LIST_HEAD(unp_head, unpcb) unp_head = 118 LIST_HEAD_INITIALIZER(unp_head); 119 /* [D] list of sets of files that were sent over sockets that are now closed */ 120 SLIST_HEAD(,unp_deferral) unp_deferred = 121 SLIST_HEAD_INITIALIZER(unp_deferred); 122 123 ino_t unp_ino; /* [U] prototype for fake inode numbers */ 124 int unp_rights; /* [R] file descriptors in flight */ 125 int unp_defer; /* [G] number of deferred fp to close by the GC task */ 126 int unp_gcing; /* [G] GC task currently running */ 127 128 const struct pr_usrreqs uipc_usrreqs = { 129 .pru_attach = uipc_attach, 130 .pru_detach = uipc_detach, 131 .pru_bind = uipc_bind, 132 .pru_listen = uipc_listen, 133 .pru_connect = uipc_connect, 134 .pru_accept = uipc_accept, 135 .pru_disconnect = uipc_disconnect, 136 .pru_shutdown = uipc_shutdown, 137 .pru_rcvd = uipc_rcvd, 138 .pru_send = uipc_send, 139 .pru_abort = uipc_abort, 140 .pru_sense = uipc_sense, 141 .pru_sockaddr = uipc_sockaddr, 142 .pru_peeraddr = uipc_peeraddr, 143 .pru_connect2 = uipc_connect2, 144 }; 145 146 const struct pr_usrreqs uipc_dgram_usrreqs = { 147 .pru_attach = uipc_attach, 148 .pru_detach = uipc_detach, 149 .pru_bind = uipc_bind, 150 .pru_listen = uipc_listen, 151 .pru_connect = uipc_connect, 152 .pru_disconnect = uipc_disconnect, 153 .pru_shutdown = uipc_dgram_shutdown, 154 .pru_send = uipc_dgram_send, 155 .pru_sense = uipc_sense, 156 .pru_sockaddr = uipc_sockaddr, 157 .pru_peeraddr = uipc_peeraddr, 158 .pru_connect2 = uipc_connect2, 159 }; 160 161 void 162 unp_init(void) 163 { 164 pool_init(&unpcb_pool, sizeof(struct unpcb), 0, 165 IPL_SOFTNET, 0, "unpcb", NULL); 166 } 167 168 static inline void 169 unp_ref(struct unpcb *unp) 170 { 171 refcnt_take(&unp->unp_refcnt); 172 } 173 174 static inline void 175 unp_rele(struct unpcb *unp) 176 { 177 refcnt_rele_wake(&unp->unp_refcnt); 178 } 179 180 struct socket * 181 unp_solock_peer(struct socket *so) 182 { 183 struct unpcb *unp, *unp2; 184 struct socket *so2; 185 186 unp = so->so_pcb; 187 188 again: 189 if ((unp2 = unp->unp_conn) == NULL) 190 return NULL; 191 192 so2 = unp2->unp_socket; 193 194 if (so < so2) 195 solock(so2); 196 else if (so > so2) { 197 unp_ref(unp2); 198 sounlock(so); 199 solock(so2); 200 solock(so); 201 202 /* Datagram socket could be reconnected due to re-lock. */ 203 if (unp->unp_conn != unp2) { 204 sounlock(so2); 205 unp_rele(unp2); 206 goto again; 207 } 208 209 unp_rele(unp2); 210 } 211 212 return so2; 213 } 214 215 void 216 uipc_setaddr(const struct unpcb *unp, struct mbuf *nam) 217 { 218 if (unp != NULL && unp->unp_addr != NULL) { 219 nam->m_len = unp->unp_addr->m_len; 220 memcpy(mtod(nam, caddr_t), mtod(unp->unp_addr, caddr_t), 221 nam->m_len); 222 } else { 223 nam->m_len = sizeof(sun_noname); 224 memcpy(mtod(nam, struct sockaddr *), &sun_noname, 225 nam->m_len); 226 } 227 } 228 229 /* 230 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 231 * for stream sockets, although the total for sender and receiver is 232 * actually only PIPSIZ. 233 * Datagram sockets really use the sendspace as the maximum datagram size, 234 * and don't really want to reserve the sendspace. Their recvspace should 235 * be large enough for at least one max-size datagram plus address. 236 */ 237 #define PIPSIZ 8192 238 u_int unpst_sendspace = PIPSIZ; 239 u_int unpst_recvspace = PIPSIZ; 240 u_int unpsq_sendspace = PIPSIZ; 241 u_int unpsq_recvspace = PIPSIZ; 242 u_int unpdg_sendspace = 2*1024; /* really max datagram size */ 243 u_int unpdg_recvspace = 16*1024; 244 245 const struct sysctl_bounded_args unpstctl_vars[] = { 246 { UNPCTL_RECVSPACE, &unpst_recvspace, 0, SB_MAX }, 247 { UNPCTL_SENDSPACE, &unpst_sendspace, 0, SB_MAX }, 248 }; 249 const struct sysctl_bounded_args unpsqctl_vars[] = { 250 { UNPCTL_RECVSPACE, &unpsq_recvspace, 0, SB_MAX }, 251 { UNPCTL_SENDSPACE, &unpsq_sendspace, 0, SB_MAX }, 252 }; 253 const struct sysctl_bounded_args unpdgctl_vars[] = { 254 { UNPCTL_RECVSPACE, &unpdg_recvspace, 0, SB_MAX }, 255 { UNPCTL_SENDSPACE, &unpdg_sendspace, 0, SB_MAX }, 256 }; 257 258 int 259 uipc_attach(struct socket *so, int proto, int wait) 260 { 261 struct unpcb *unp; 262 int error; 263 264 if (so->so_pcb) 265 return EISCONN; 266 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 267 switch (so->so_type) { 268 269 case SOCK_STREAM: 270 error = soreserve(so, unpst_sendspace, unpst_recvspace); 271 break; 272 273 case SOCK_SEQPACKET: 274 error = soreserve(so, unpsq_sendspace, unpsq_recvspace); 275 break; 276 277 case SOCK_DGRAM: 278 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 279 break; 280 281 default: 282 panic("unp_attach"); 283 } 284 if (error) 285 return (error); 286 } 287 unp = pool_get(&unpcb_pool, (wait == M_WAIT ? PR_WAITOK : PR_NOWAIT) | 288 PR_ZERO); 289 if (unp == NULL) 290 return (ENOBUFS); 291 refcnt_init(&unp->unp_refcnt); 292 unp->unp_socket = so; 293 so->so_pcb = unp; 294 getnanotime(&unp->unp_ctime); 295 296 rw_enter_write(&unp_gc_lock); 297 LIST_INSERT_HEAD(&unp_head, unp, unp_link); 298 rw_exit_write(&unp_gc_lock); 299 300 return (0); 301 } 302 303 int 304 uipc_detach(struct socket *so) 305 { 306 struct unpcb *unp = sotounpcb(so); 307 308 if (unp == NULL) 309 return (EINVAL); 310 311 unp_detach(unp); 312 313 return (0); 314 } 315 316 int 317 uipc_bind(struct socket *so, struct mbuf *nam, struct proc *p) 318 { 319 struct unpcb *unp = sotounpcb(so); 320 struct sockaddr_un *soun; 321 struct mbuf *nam2; 322 struct vnode *vp; 323 struct vattr vattr; 324 int error; 325 struct nameidata nd; 326 size_t pathlen; 327 328 if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING)) 329 return (EINVAL); 330 if (unp->unp_vnode != NULL) 331 return (EINVAL); 332 if ((error = unp_nam2sun(nam, &soun, &pathlen))) 333 return (error); 334 335 unp->unp_flags |= UNP_BINDING; 336 337 /* 338 * Enforce `i_lock' -> `solock' because fifo subsystem 339 * requires it. The socket can't be closed concurrently 340 * because the file descriptor reference is still held. 341 */ 342 343 sounlock(unp->unp_socket); 344 345 nam2 = m_getclr(M_WAITOK, MT_SONAME); 346 nam2->m_len = sizeof(struct sockaddr_un); 347 memcpy(mtod(nam2, struct sockaddr_un *), soun, 348 offsetof(struct sockaddr_un, sun_path) + pathlen); 349 /* No need to NUL terminate: m_getclr() returns zero'd mbufs. */ 350 351 soun = mtod(nam2, struct sockaddr_un *); 352 353 /* Fixup sun_len to keep it in sync with m_len. */ 354 soun->sun_len = nam2->m_len; 355 356 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE, 357 soun->sun_path, p); 358 nd.ni_pledge = PLEDGE_UNIX; 359 nd.ni_unveil = UNVEIL_CREATE; 360 361 KERNEL_LOCK(); 362 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 363 error = namei(&nd); 364 if (error != 0) { 365 m_freem(nam2); 366 solock(unp->unp_socket); 367 goto out; 368 } 369 vp = nd.ni_vp; 370 if (vp != NULL) { 371 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 372 if (nd.ni_dvp == vp) 373 vrele(nd.ni_dvp); 374 else 375 vput(nd.ni_dvp); 376 vrele(vp); 377 m_freem(nam2); 378 error = EADDRINUSE; 379 solock(unp->unp_socket); 380 goto out; 381 } 382 VATTR_NULL(&vattr); 383 vattr.va_type = VSOCK; 384 vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask; 385 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 386 vput(nd.ni_dvp); 387 if (error) { 388 m_freem(nam2); 389 solock(unp->unp_socket); 390 goto out; 391 } 392 solock(unp->unp_socket); 393 unp->unp_addr = nam2; 394 vp = nd.ni_vp; 395 vp->v_socket = unp->unp_socket; 396 unp->unp_vnode = vp; 397 unp->unp_connid.uid = p->p_ucred->cr_uid; 398 unp->unp_connid.gid = p->p_ucred->cr_gid; 399 unp->unp_connid.pid = p->p_p->ps_pid; 400 unp->unp_flags |= UNP_FEIDSBIND; 401 VOP_UNLOCK(vp); 402 out: 403 KERNEL_UNLOCK(); 404 unp->unp_flags &= ~UNP_BINDING; 405 406 return (error); 407 } 408 409 int 410 uipc_listen(struct socket *so) 411 { 412 struct unpcb *unp = sotounpcb(so); 413 414 if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING)) 415 return (EINVAL); 416 if (unp->unp_vnode == NULL) 417 return (EINVAL); 418 return (0); 419 } 420 421 int 422 uipc_connect(struct socket *so, struct mbuf *nam) 423 { 424 return unp_connect(so, nam, curproc); 425 } 426 427 int 428 uipc_accept(struct socket *so, struct mbuf *nam) 429 { 430 struct socket *so2; 431 struct unpcb *unp = sotounpcb(so); 432 433 /* 434 * Pass back name of connected socket, if it was bound and 435 * we are still connected (our peer may have closed already!). 436 */ 437 so2 = unp_solock_peer(so); 438 uipc_setaddr(unp->unp_conn, nam); 439 440 if (so2 != NULL && so2 != so) 441 sounlock(so2); 442 return (0); 443 } 444 445 int 446 uipc_disconnect(struct socket *so) 447 { 448 struct unpcb *unp = sotounpcb(so); 449 450 unp_disconnect(unp); 451 return (0); 452 } 453 454 int 455 uipc_shutdown(struct socket *so) 456 { 457 struct unpcb *unp = sotounpcb(so); 458 struct socket *so2; 459 460 socantsendmore(so); 461 462 if (unp->unp_conn != NULL) { 463 so2 = unp->unp_conn->unp_socket; 464 socantrcvmore(so2); 465 } 466 467 return (0); 468 } 469 470 int 471 uipc_dgram_shutdown(struct socket *so) 472 { 473 socantsendmore(so); 474 return (0); 475 } 476 477 void 478 uipc_rcvd(struct socket *so) 479 { 480 struct socket *so2; 481 482 if ((so2 = unp_solock_peer(so)) == NULL) 483 return; 484 /* 485 * Adjust backpressure on sender 486 * and wakeup any waiting to write. 487 */ 488 mtx_enter(&so->so_rcv.sb_mtx); 489 so2->so_snd.sb_mbcnt = so->so_rcv.sb_mbcnt; 490 so2->so_snd.sb_cc = so->so_rcv.sb_cc; 491 mtx_leave(&so->so_rcv.sb_mtx); 492 sowwakeup(so2); 493 sounlock(so2); 494 } 495 496 int 497 uipc_send(struct socket *so, struct mbuf *m, struct mbuf *nam, 498 struct mbuf *control) 499 { 500 struct unpcb *unp = sotounpcb(so); 501 struct socket *so2; 502 int error = 0, dowakeup = 0; 503 504 if (control) { 505 sounlock(so); 506 error = unp_internalize(control, curproc); 507 solock(so); 508 if (error) 509 goto out; 510 } 511 512 if (so->so_snd.sb_state & SS_CANTSENDMORE) { 513 error = EPIPE; 514 goto dispose; 515 } 516 if (unp->unp_conn == NULL) { 517 error = ENOTCONN; 518 goto dispose; 519 } 520 521 so2 = unp->unp_conn->unp_socket; 522 523 /* 524 * Send to paired receive port, and then raise 525 * send buffer counts to maintain backpressure. 526 * Wake up readers. 527 */ 528 mtx_enter(&so2->so_rcv.sb_mtx); 529 if (control) { 530 if (sbappendcontrol(so2, &so2->so_rcv, m, control)) { 531 control = NULL; 532 } else { 533 mtx_leave(&so2->so_rcv.sb_mtx); 534 error = ENOBUFS; 535 goto dispose; 536 } 537 } else if (so->so_type == SOCK_SEQPACKET) 538 sbappendrecord(so2, &so2->so_rcv, m); 539 else 540 sbappend(so2, &so2->so_rcv, m); 541 so->so_snd.sb_mbcnt = so2->so_rcv.sb_mbcnt; 542 so->so_snd.sb_cc = so2->so_rcv.sb_cc; 543 if (so2->so_rcv.sb_cc > 0) 544 dowakeup = 1; 545 mtx_leave(&so2->so_rcv.sb_mtx); 546 547 if (dowakeup) 548 sorwakeup(so2); 549 550 m = NULL; 551 552 dispose: 553 /* we need to undo unp_internalize in case of errors */ 554 if (control && error) 555 unp_dispose(control); 556 557 out: 558 m_freem(control); 559 m_freem(m); 560 561 return (error); 562 } 563 564 int 565 uipc_dgram_send(struct socket *so, struct mbuf *m, struct mbuf *nam, 566 struct mbuf *control) 567 { 568 struct unpcb *unp = sotounpcb(so); 569 struct socket *so2; 570 const struct sockaddr *from; 571 int error = 0, dowakeup = 0; 572 573 if (control) { 574 sounlock(so); 575 error = unp_internalize(control, curproc); 576 solock(so); 577 if (error) 578 goto out; 579 } 580 581 if (nam) { 582 if (unp->unp_conn) { 583 error = EISCONN; 584 goto dispose; 585 } 586 error = unp_connect(so, nam, curproc); 587 if (error) 588 goto dispose; 589 } 590 591 if (unp->unp_conn == NULL) { 592 if (nam != NULL) 593 error = ECONNREFUSED; 594 else 595 error = ENOTCONN; 596 goto dispose; 597 } 598 599 so2 = unp->unp_conn->unp_socket; 600 601 if (unp->unp_addr) 602 from = mtod(unp->unp_addr, struct sockaddr *); 603 else 604 from = &sun_noname; 605 606 mtx_enter(&so2->so_rcv.sb_mtx); 607 if (sbappendaddr(so2, &so2->so_rcv, from, m, control)) { 608 dowakeup = 1; 609 m = NULL; 610 control = NULL; 611 } else 612 error = ENOBUFS; 613 mtx_leave(&so2->so_rcv.sb_mtx); 614 615 if (dowakeup) 616 sorwakeup(so2); 617 if (nam) 618 unp_disconnect(unp); 619 620 dispose: 621 /* we need to undo unp_internalize in case of errors */ 622 if (control && error) 623 unp_dispose(control); 624 625 out: 626 m_freem(control); 627 m_freem(m); 628 629 return (error); 630 } 631 632 void 633 uipc_abort(struct socket *so) 634 { 635 struct unpcb *unp = sotounpcb(so); 636 637 unp_detach(unp); 638 sofree(so, 0); 639 } 640 641 int 642 uipc_sense(struct socket *so, struct stat *sb) 643 { 644 struct unpcb *unp = sotounpcb(so); 645 646 sb->st_blksize = so->so_snd.sb_hiwat; 647 sb->st_dev = NODEV; 648 mtx_enter(&unp_ino_mtx); 649 if (unp->unp_ino == 0) 650 unp->unp_ino = unp_ino++; 651 mtx_leave(&unp_ino_mtx); 652 sb->st_atim.tv_sec = 653 sb->st_mtim.tv_sec = 654 sb->st_ctim.tv_sec = unp->unp_ctime.tv_sec; 655 sb->st_atim.tv_nsec = 656 sb->st_mtim.tv_nsec = 657 sb->st_ctim.tv_nsec = unp->unp_ctime.tv_nsec; 658 sb->st_ino = unp->unp_ino; 659 660 return (0); 661 } 662 663 int 664 uipc_sockaddr(struct socket *so, struct mbuf *nam) 665 { 666 struct unpcb *unp = sotounpcb(so); 667 668 uipc_setaddr(unp, nam); 669 return (0); 670 } 671 672 int 673 uipc_peeraddr(struct socket *so, struct mbuf *nam) 674 { 675 struct unpcb *unp = sotounpcb(so); 676 struct socket *so2; 677 678 so2 = unp_solock_peer(so); 679 uipc_setaddr(unp->unp_conn, nam); 680 if (so2 != NULL && so2 != so) 681 sounlock(so2); 682 return (0); 683 } 684 685 int 686 uipc_connect2(struct socket *so, struct socket *so2) 687 { 688 struct unpcb *unp = sotounpcb(so), *unp2; 689 int error; 690 691 if ((error = unp_connect2(so, so2))) 692 return (error); 693 694 unp->unp_connid.uid = curproc->p_ucred->cr_uid; 695 unp->unp_connid.gid = curproc->p_ucred->cr_gid; 696 unp->unp_connid.pid = curproc->p_p->ps_pid; 697 unp->unp_flags |= UNP_FEIDS; 698 unp2 = sotounpcb(so2); 699 unp2->unp_connid.uid = curproc->p_ucred->cr_uid; 700 unp2->unp_connid.gid = curproc->p_ucred->cr_gid; 701 unp2->unp_connid.pid = curproc->p_p->ps_pid; 702 unp2->unp_flags |= UNP_FEIDS; 703 704 return (0); 705 } 706 707 int 708 uipc_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 709 size_t newlen) 710 { 711 int *valp = &unp_defer; 712 713 /* All sysctl names at this level are terminal. */ 714 switch (name[0]) { 715 case SOCK_STREAM: 716 if (namelen != 2) 717 return (ENOTDIR); 718 return sysctl_bounded_arr(unpstctl_vars, nitems(unpstctl_vars), 719 name + 1, namelen - 1, oldp, oldlenp, newp, newlen); 720 case SOCK_SEQPACKET: 721 if (namelen != 2) 722 return (ENOTDIR); 723 return sysctl_bounded_arr(unpsqctl_vars, nitems(unpsqctl_vars), 724 name + 1, namelen - 1, oldp, oldlenp, newp, newlen); 725 case SOCK_DGRAM: 726 if (namelen != 2) 727 return (ENOTDIR); 728 return sysctl_bounded_arr(unpdgctl_vars, nitems(unpdgctl_vars), 729 name + 1, namelen - 1, oldp, oldlenp, newp, newlen); 730 case NET_UNIX_INFLIGHT: 731 valp = &unp_rights; 732 /* FALLTHROUGH */ 733 case NET_UNIX_DEFERRED: 734 if (namelen != 1) 735 return (ENOTDIR); 736 return sysctl_rdint(oldp, oldlenp, newp, *valp); 737 default: 738 return (ENOPROTOOPT); 739 } 740 } 741 742 void 743 unp_detach(struct unpcb *unp) 744 { 745 struct socket *so = unp->unp_socket; 746 struct vnode *vp = unp->unp_vnode; 747 struct unpcb *unp2; 748 749 unp->unp_vnode = NULL; 750 751 /* 752 * Enforce `i_lock' -> `solock()' lock order. 753 */ 754 sounlock(so); 755 756 rw_enter_write(&unp_gc_lock); 757 LIST_REMOVE(unp, unp_link); 758 rw_exit_write(&unp_gc_lock); 759 760 if (vp != NULL) { 761 VOP_LOCK(vp, LK_EXCLUSIVE); 762 vp->v_socket = NULL; 763 764 KERNEL_LOCK(); 765 vput(vp); 766 KERNEL_UNLOCK(); 767 } 768 769 solock(so); 770 771 if (unp->unp_conn != NULL) { 772 /* 773 * Datagram socket could be connected to itself. 774 * Such socket will be disconnected here. 775 */ 776 unp_disconnect(unp); 777 } 778 779 while ((unp2 = SLIST_FIRST(&unp->unp_refs)) != NULL) { 780 struct socket *so2 = unp2->unp_socket; 781 782 if (so < so2) 783 solock(so2); 784 else { 785 unp_ref(unp2); 786 sounlock(so); 787 solock(so2); 788 solock(so); 789 790 if (unp2->unp_conn != unp) { 791 /* `unp2' was disconnected due to re-lock. */ 792 sounlock(so2); 793 unp_rele(unp2); 794 continue; 795 } 796 797 unp_rele(unp2); 798 } 799 800 unp2->unp_conn = NULL; 801 SLIST_REMOVE(&unp->unp_refs, unp2, unpcb, unp_nextref); 802 so2->so_error = ECONNRESET; 803 so2->so_state &= ~SS_ISCONNECTED; 804 805 sounlock(so2); 806 } 807 808 sounlock(so); 809 refcnt_finalize(&unp->unp_refcnt, "unpfinal"); 810 solock(so); 811 812 soisdisconnected(so); 813 so->so_pcb = NULL; 814 m_freem(unp->unp_addr); 815 pool_put(&unpcb_pool, unp); 816 if (unp_rights) 817 task_add(systqmp, &unp_gc_task); 818 } 819 820 int 821 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p) 822 { 823 struct sockaddr_un *soun; 824 struct vnode *vp; 825 struct socket *so2, *so3; 826 struct unpcb *unp, *unp2, *unp3; 827 struct nameidata nd; 828 int error; 829 830 unp = sotounpcb(so); 831 if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING)) 832 return (EISCONN); 833 if ((error = unp_nam2sun(nam, &soun, NULL))) 834 return (error); 835 836 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p); 837 nd.ni_pledge = PLEDGE_UNIX; 838 nd.ni_unveil = UNVEIL_WRITE; 839 840 unp->unp_flags |= UNP_CONNECTING; 841 842 /* 843 * Enforce `i_lock' -> `solock' because fifo subsystem 844 * requires it. The socket can't be closed concurrently 845 * because the file descriptor reference is still held. 846 */ 847 848 sounlock(so); 849 850 KERNEL_LOCK(); 851 error = namei(&nd); 852 if (error != 0) 853 goto unlock; 854 vp = nd.ni_vp; 855 if (vp->v_type != VSOCK) { 856 error = ENOTSOCK; 857 goto put; 858 } 859 if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0) 860 goto put; 861 so2 = vp->v_socket; 862 if (so2 == NULL) { 863 error = ECONNREFUSED; 864 goto put; 865 } 866 if (so->so_type != so2->so_type) { 867 error = EPROTOTYPE; 868 goto put; 869 } 870 871 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 872 solock(so2); 873 874 if ((so2->so_options & SO_ACCEPTCONN) == 0 || 875 (so3 = sonewconn(so2, 0, M_WAIT)) == NULL) { 876 error = ECONNREFUSED; 877 } 878 879 sounlock(so2); 880 881 if (error != 0) 882 goto put; 883 884 /* 885 * Since `so2' is protected by vnode(9) lock, `so3' 886 * can't be PRU_ABORT'ed here. 887 */ 888 solock_pair(so, so3); 889 890 unp2 = sotounpcb(so2); 891 unp3 = sotounpcb(so3); 892 893 /* 894 * `unp_addr', `unp_connid' and 'UNP_FEIDSBIND' flag 895 * are immutable since we set them in uipc_bind(). 896 */ 897 if (unp2->unp_addr) 898 unp3->unp_addr = 899 m_copym(unp2->unp_addr, 0, M_COPYALL, M_NOWAIT); 900 unp3->unp_connid.uid = p->p_ucred->cr_uid; 901 unp3->unp_connid.gid = p->p_ucred->cr_gid; 902 unp3->unp_connid.pid = p->p_p->ps_pid; 903 unp3->unp_flags |= UNP_FEIDS; 904 905 if (unp2->unp_flags & UNP_FEIDSBIND) { 906 unp->unp_connid = unp2->unp_connid; 907 unp->unp_flags |= UNP_FEIDS; 908 } 909 910 so2 = so3; 911 } else { 912 if (so2 != so) 913 solock_pair(so, so2); 914 else 915 solock(so); 916 } 917 918 error = unp_connect2(so, so2); 919 920 sounlock(so); 921 922 /* 923 * `so2' can't be PRU_ABORT'ed concurrently 924 */ 925 if (so2 != so) 926 sounlock(so2); 927 put: 928 vput(vp); 929 unlock: 930 KERNEL_UNLOCK(); 931 solock(so); 932 unp->unp_flags &= ~UNP_CONNECTING; 933 934 /* 935 * The peer socket could be closed by concurrent thread 936 * when `so' and `vp' are unlocked. 937 */ 938 if (error == 0 && unp->unp_conn == NULL) 939 error = ECONNREFUSED; 940 941 return (error); 942 } 943 944 int 945 unp_connect2(struct socket *so, struct socket *so2) 946 { 947 struct unpcb *unp = sotounpcb(so); 948 struct unpcb *unp2; 949 950 soassertlocked(so); 951 soassertlocked(so2); 952 953 if (so2->so_type != so->so_type) 954 return (EPROTOTYPE); 955 unp2 = sotounpcb(so2); 956 unp->unp_conn = unp2; 957 switch (so->so_type) { 958 959 case SOCK_DGRAM: 960 SLIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_nextref); 961 soisconnected(so); 962 break; 963 964 case SOCK_STREAM: 965 case SOCK_SEQPACKET: 966 unp2->unp_conn = unp; 967 soisconnected(so); 968 soisconnected(so2); 969 break; 970 971 default: 972 panic("unp_connect2"); 973 } 974 return (0); 975 } 976 977 void 978 unp_disconnect(struct unpcb *unp) 979 { 980 struct socket *so2; 981 struct unpcb *unp2; 982 983 if ((so2 = unp_solock_peer(unp->unp_socket)) == NULL) 984 return; 985 986 unp2 = unp->unp_conn; 987 unp->unp_conn = NULL; 988 989 switch (unp->unp_socket->so_type) { 990 991 case SOCK_DGRAM: 992 SLIST_REMOVE(&unp2->unp_refs, unp, unpcb, unp_nextref); 993 unp->unp_socket->so_state &= ~SS_ISCONNECTED; 994 break; 995 996 case SOCK_STREAM: 997 case SOCK_SEQPACKET: 998 unp->unp_socket->so_snd.sb_mbcnt = 0; 999 unp->unp_socket->so_snd.sb_cc = 0; 1000 soisdisconnected(unp->unp_socket); 1001 unp2->unp_conn = NULL; 1002 unp2->unp_socket->so_snd.sb_mbcnt = 0; 1003 unp2->unp_socket->so_snd.sb_cc = 0; 1004 soisdisconnected(unp2->unp_socket); 1005 break; 1006 } 1007 1008 if (so2 != unp->unp_socket) 1009 sounlock(so2); 1010 } 1011 1012 static struct unpcb * 1013 fptounp(struct file *fp) 1014 { 1015 struct socket *so; 1016 1017 if (fp->f_type != DTYPE_SOCKET) 1018 return (NULL); 1019 if ((so = fp->f_data) == NULL) 1020 return (NULL); 1021 if (so->so_proto->pr_domain != &unixdomain) 1022 return (NULL); 1023 return (sotounpcb(so)); 1024 } 1025 1026 int 1027 unp_externalize(struct mbuf *rights, socklen_t controllen, int flags) 1028 { 1029 struct proc *p = curproc; /* XXX */ 1030 struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 1031 struct filedesc *fdp = p->p_fd; 1032 int i, *fds = NULL; 1033 struct fdpass *rp; 1034 struct file *fp; 1035 int nfds, error = 0; 1036 1037 /* 1038 * This code only works because SCM_RIGHTS is the only supported 1039 * control message type on unix sockets. Enforce this here. 1040 */ 1041 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET) 1042 return EINVAL; 1043 1044 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / 1045 sizeof(struct fdpass); 1046 if (controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) 1047 controllen = 0; 1048 else 1049 controllen -= CMSG_ALIGN(sizeof(struct cmsghdr)); 1050 if (nfds > controllen / sizeof(int)) { 1051 error = EMSGSIZE; 1052 goto out; 1053 } 1054 1055 /* Make sure the recipient should be able to see the descriptors.. */ 1056 rp = (struct fdpass *)CMSG_DATA(cm); 1057 1058 /* fdp->fd_rdir requires KERNEL_LOCK() */ 1059 KERNEL_LOCK(); 1060 1061 for (i = 0; i < nfds; i++) { 1062 fp = rp->fp; 1063 rp++; 1064 error = pledge_recvfd(p, fp); 1065 if (error) 1066 break; 1067 1068 /* 1069 * No to block devices. If passing a directory, 1070 * make sure that it is underneath the root. 1071 */ 1072 if (fdp->fd_rdir != NULL && fp->f_type == DTYPE_VNODE) { 1073 struct vnode *vp = (struct vnode *)fp->f_data; 1074 1075 if (vp->v_type == VBLK || 1076 (vp->v_type == VDIR && 1077 !vn_isunder(vp, fdp->fd_rdir, p))) { 1078 error = EPERM; 1079 break; 1080 } 1081 } 1082 } 1083 1084 KERNEL_UNLOCK(); 1085 1086 if (error) 1087 goto out; 1088 1089 fds = mallocarray(nfds, sizeof(int), M_TEMP, M_WAITOK); 1090 1091 fdplock(fdp); 1092 restart: 1093 /* 1094 * First loop -- allocate file descriptor table slots for the 1095 * new descriptors. 1096 */ 1097 rp = ((struct fdpass *)CMSG_DATA(cm)); 1098 for (i = 0; i < nfds; i++) { 1099 if ((error = fdalloc(p, 0, &fds[i])) != 0) { 1100 /* 1101 * Back out what we've done so far. 1102 */ 1103 for (--i; i >= 0; i--) 1104 fdremove(fdp, fds[i]); 1105 1106 if (error == ENOSPC) { 1107 fdexpand(p); 1108 goto restart; 1109 } 1110 1111 fdpunlock(fdp); 1112 1113 /* 1114 * This is the error that has historically 1115 * been returned, and some callers may 1116 * expect it. 1117 */ 1118 1119 error = EMSGSIZE; 1120 goto out; 1121 } 1122 1123 /* 1124 * Make the slot reference the descriptor so that 1125 * fdalloc() works properly.. We finalize it all 1126 * in the loop below. 1127 */ 1128 mtx_enter(&fdp->fd_fplock); 1129 KASSERT(fdp->fd_ofiles[fds[i]] == NULL); 1130 fdp->fd_ofiles[fds[i]] = rp->fp; 1131 mtx_leave(&fdp->fd_fplock); 1132 1133 fdp->fd_ofileflags[fds[i]] = (rp->flags & UF_PLEDGED); 1134 if (flags & MSG_CMSG_CLOEXEC) 1135 fdp->fd_ofileflags[fds[i]] |= UF_EXCLOSE; 1136 1137 rp++; 1138 } 1139 1140 /* 1141 * Keep `fdp' locked to prevent concurrent close() of just 1142 * inserted descriptors. Such descriptors could have the only 1143 * `f_count' reference which is now shared between control 1144 * message and `fdp'. 1145 */ 1146 1147 /* 1148 * Now that adding them has succeeded, update all of the 1149 * descriptor passing state. 1150 */ 1151 rp = (struct fdpass *)CMSG_DATA(cm); 1152 1153 for (i = 0; i < nfds; i++) { 1154 struct unpcb *unp; 1155 1156 fp = rp->fp; 1157 rp++; 1158 if ((unp = fptounp(fp)) != NULL) { 1159 rw_enter_write(&unp_gc_lock); 1160 unp->unp_msgcount--; 1161 rw_exit_write(&unp_gc_lock); 1162 } 1163 } 1164 fdpunlock(fdp); 1165 1166 mtx_enter(&unp_rights_mtx); 1167 unp_rights -= nfds; 1168 mtx_leave(&unp_rights_mtx); 1169 1170 /* 1171 * Copy temporary array to message and adjust length, in case of 1172 * transition from large struct file pointers to ints. 1173 */ 1174 memcpy(CMSG_DATA(cm), fds, nfds * sizeof(int)); 1175 cm->cmsg_len = CMSG_LEN(nfds * sizeof(int)); 1176 rights->m_len = CMSG_LEN(nfds * sizeof(int)); 1177 out: 1178 if (fds != NULL) 1179 free(fds, M_TEMP, nfds * sizeof(int)); 1180 1181 if (error) { 1182 if (nfds > 0) { 1183 /* 1184 * No lock required. We are the only `cm' holder. 1185 */ 1186 rp = ((struct fdpass *)CMSG_DATA(cm)); 1187 unp_discard(rp, nfds); 1188 } 1189 } 1190 1191 return (error); 1192 } 1193 1194 int 1195 unp_internalize(struct mbuf *control, struct proc *p) 1196 { 1197 struct filedesc *fdp = p->p_fd; 1198 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1199 struct fdpass *rp; 1200 struct file *fp; 1201 struct unpcb *unp; 1202 int i, error; 1203 int nfds, *ip, fd, neededspace; 1204 1205 /* 1206 * Check for two potential msg_controllen values because 1207 * IETF stuck their nose in a place it does not belong. 1208 */ 1209 if (control->m_len < CMSG_LEN(0) || cm->cmsg_len < CMSG_LEN(0)) 1210 return (EINVAL); 1211 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || 1212 !(cm->cmsg_len == control->m_len || 1213 control->m_len == CMSG_ALIGN(cm->cmsg_len))) 1214 return (EINVAL); 1215 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int); 1216 1217 mtx_enter(&unp_rights_mtx); 1218 if (unp_rights + nfds > maxfiles / 10) { 1219 mtx_leave(&unp_rights_mtx); 1220 return (EMFILE); 1221 } 1222 unp_rights += nfds; 1223 mtx_leave(&unp_rights_mtx); 1224 1225 /* Make sure we have room for the struct file pointers */ 1226 morespace: 1227 neededspace = CMSG_SPACE(nfds * sizeof(struct fdpass)) - 1228 control->m_len; 1229 if (neededspace > m_trailingspace(control)) { 1230 char *tmp; 1231 /* if we already have a cluster, the message is just too big */ 1232 if (control->m_flags & M_EXT) { 1233 error = E2BIG; 1234 goto nospace; 1235 } 1236 1237 /* copy cmsg data temporarily out of the mbuf */ 1238 tmp = malloc(control->m_len, M_TEMP, M_WAITOK); 1239 memcpy(tmp, mtod(control, caddr_t), control->m_len); 1240 1241 /* allocate a cluster and try again */ 1242 MCLGET(control, M_WAIT); 1243 if ((control->m_flags & M_EXT) == 0) { 1244 free(tmp, M_TEMP, control->m_len); 1245 error = ENOBUFS; /* allocation failed */ 1246 goto nospace; 1247 } 1248 1249 /* copy the data back into the cluster */ 1250 cm = mtod(control, struct cmsghdr *); 1251 memcpy(cm, tmp, control->m_len); 1252 free(tmp, M_TEMP, control->m_len); 1253 goto morespace; 1254 } 1255 1256 /* adjust message & mbuf to note amount of space actually used. */ 1257 cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct fdpass)); 1258 control->m_len = CMSG_SPACE(nfds * sizeof(struct fdpass)); 1259 1260 ip = ((int *)CMSG_DATA(cm)) + nfds - 1; 1261 rp = ((struct fdpass *)CMSG_DATA(cm)) + nfds - 1; 1262 fdplock(fdp); 1263 for (i = 0; i < nfds; i++) { 1264 memcpy(&fd, ip, sizeof fd); 1265 ip--; 1266 if ((fp = fd_getfile(fdp, fd)) == NULL) { 1267 error = EBADF; 1268 goto fail; 1269 } 1270 if (fp->f_count >= FDUP_MAX_COUNT) { 1271 error = EDEADLK; 1272 goto fail; 1273 } 1274 error = pledge_sendfd(p, fp); 1275 if (error) 1276 goto fail; 1277 1278 /* kqueue descriptors cannot be copied */ 1279 if (fp->f_type == DTYPE_KQUEUE) { 1280 error = EINVAL; 1281 goto fail; 1282 } 1283 #if NKCOV > 0 1284 /* kcov descriptors cannot be copied */ 1285 if (fp->f_type == DTYPE_VNODE && kcov_vnode(fp->f_data)) { 1286 error = EINVAL; 1287 goto fail; 1288 } 1289 #endif 1290 rp->fp = fp; 1291 rp->flags = fdp->fd_ofileflags[fd] & UF_PLEDGED; 1292 rp--; 1293 if ((unp = fptounp(fp)) != NULL) { 1294 rw_enter_write(&unp_gc_lock); 1295 unp->unp_msgcount++; 1296 unp->unp_file = fp; 1297 rw_exit_write(&unp_gc_lock); 1298 } 1299 } 1300 fdpunlock(fdp); 1301 return (0); 1302 fail: 1303 fdpunlock(fdp); 1304 if (fp != NULL) 1305 FRELE(fp, p); 1306 /* Back out what we just did. */ 1307 for ( ; i > 0; i--) { 1308 rp++; 1309 fp = rp->fp; 1310 if ((unp = fptounp(fp)) != NULL) { 1311 rw_enter_write(&unp_gc_lock); 1312 unp->unp_msgcount--; 1313 rw_exit_write(&unp_gc_lock); 1314 } 1315 FRELE(fp, p); 1316 } 1317 1318 nospace: 1319 mtx_enter(&unp_rights_mtx); 1320 unp_rights -= nfds; 1321 mtx_leave(&unp_rights_mtx); 1322 1323 return (error); 1324 } 1325 1326 void 1327 unp_gc(void *arg __unused) 1328 { 1329 struct unp_deferral *defer; 1330 struct file *fp; 1331 struct socket *so; 1332 struct unpcb *unp; 1333 int nunref, i; 1334 1335 rw_enter_write(&unp_gc_lock); 1336 if (unp_gcing) 1337 goto unlock; 1338 unp_gcing = 1; 1339 rw_exit_write(&unp_gc_lock); 1340 1341 rw_enter_write(&unp_df_lock); 1342 /* close any fds on the deferred list */ 1343 while ((defer = SLIST_FIRST(&unp_deferred)) != NULL) { 1344 SLIST_REMOVE_HEAD(&unp_deferred, ud_link); 1345 rw_exit_write(&unp_df_lock); 1346 for (i = 0; i < defer->ud_n; i++) { 1347 fp = defer->ud_fp[i].fp; 1348 if (fp == NULL) 1349 continue; 1350 if ((unp = fptounp(fp)) != NULL) { 1351 rw_enter_write(&unp_gc_lock); 1352 unp->unp_msgcount--; 1353 rw_exit_write(&unp_gc_lock); 1354 } 1355 mtx_enter(&unp_rights_mtx); 1356 unp_rights--; 1357 mtx_leave(&unp_rights_mtx); 1358 /* closef() expects a refcount of 2 */ 1359 FREF(fp); 1360 (void) closef(fp, NULL); 1361 } 1362 free(defer, M_TEMP, sizeof(*defer) + 1363 sizeof(struct fdpass) * defer->ud_n); 1364 rw_enter_write(&unp_df_lock); 1365 } 1366 rw_exit_write(&unp_df_lock); 1367 1368 nunref = 0; 1369 1370 rw_enter_write(&unp_gc_lock); 1371 1372 /* 1373 * Determine sockets which may be prospectively dead. Such 1374 * sockets have their `unp_msgcount' equal to the `f_count'. 1375 * If `unp_msgcount' is 0, the socket has not been passed 1376 * and can't be unreferenced. 1377 */ 1378 LIST_FOREACH(unp, &unp_head, unp_link) { 1379 unp->unp_gcflags = 0; 1380 1381 if (unp->unp_msgcount == 0) 1382 continue; 1383 if ((fp = unp->unp_file) == NULL) 1384 continue; 1385 if (fp->f_count == unp->unp_msgcount) { 1386 unp->unp_gcflags |= UNP_GCDEAD; 1387 unp->unp_gcrefs = unp->unp_msgcount; 1388 nunref++; 1389 } 1390 } 1391 1392 /* 1393 * Scan all sockets previously marked as dead. Remove 1394 * the `unp_gcrefs' reference each socket holds on any 1395 * dead socket in its buffer. 1396 */ 1397 LIST_FOREACH(unp, &unp_head, unp_link) { 1398 if ((unp->unp_gcflags & UNP_GCDEAD) == 0) 1399 continue; 1400 so = unp->unp_socket; 1401 mtx_enter(&so->so_rcv.sb_mtx); 1402 unp_scan(so->so_rcv.sb_mb, unp_remove_gcrefs); 1403 mtx_leave(&so->so_rcv.sb_mtx); 1404 } 1405 1406 /* 1407 * If the dead socket has `unp_gcrefs' reference counter 1408 * greater than 0, it can't be unreferenced. Mark it as 1409 * alive and increment the `unp_gcrefs' reference for each 1410 * dead socket within its buffer. Repeat this until we 1411 * have no new alive sockets found. 1412 */ 1413 do { 1414 unp_defer = 0; 1415 1416 LIST_FOREACH(unp, &unp_head, unp_link) { 1417 if ((unp->unp_gcflags & UNP_GCDEAD) == 0) 1418 continue; 1419 if (unp->unp_gcrefs == 0) 1420 continue; 1421 1422 unp->unp_gcflags &= ~UNP_GCDEAD; 1423 1424 so = unp->unp_socket; 1425 mtx_enter(&so->so_rcv.sb_mtx); 1426 unp_scan(so->so_rcv.sb_mb, unp_restore_gcrefs); 1427 mtx_leave(&so->so_rcv.sb_mtx); 1428 1429 KASSERT(nunref > 0); 1430 nunref--; 1431 } 1432 } while (unp_defer > 0); 1433 1434 /* 1435 * If there are any unreferenced sockets, then for each dispose 1436 * of files in its receive buffer and then close it. 1437 */ 1438 if (nunref) { 1439 LIST_FOREACH(unp, &unp_head, unp_link) { 1440 if (unp->unp_gcflags & UNP_GCDEAD) { 1441 struct sockbuf *sb = &unp->unp_socket->so_rcv; 1442 struct mbuf *m; 1443 1444 /* 1445 * This socket could still be connected 1446 * and if so it's `so_rcv' is still 1447 * accessible by concurrent PRU_SEND 1448 * thread. 1449 */ 1450 1451 mtx_enter(&sb->sb_mtx); 1452 m = sb->sb_mb; 1453 memset(&sb->sb_startzero, 0, 1454 (caddr_t)&sb->sb_endzero - 1455 (caddr_t)&sb->sb_startzero); 1456 sb->sb_timeo_nsecs = INFSLP; 1457 mtx_leave(&sb->sb_mtx); 1458 1459 unp_scan(m, unp_discard); 1460 m_purge(m); 1461 } 1462 } 1463 } 1464 1465 unp_gcing = 0; 1466 unlock: 1467 rw_exit_write(&unp_gc_lock); 1468 } 1469 1470 void 1471 unp_dispose(struct mbuf *m) 1472 { 1473 1474 if (m) 1475 unp_scan(m, unp_discard); 1476 } 1477 1478 void 1479 unp_scan(struct mbuf *m0, void (*op)(struct fdpass *, int)) 1480 { 1481 struct mbuf *m; 1482 struct fdpass *rp; 1483 struct cmsghdr *cm; 1484 int qfds; 1485 1486 while (m0) { 1487 for (m = m0; m; m = m->m_next) { 1488 if (m->m_type == MT_CONTROL && 1489 m->m_len >= sizeof(*cm)) { 1490 cm = mtod(m, struct cmsghdr *); 1491 if (cm->cmsg_level != SOL_SOCKET || 1492 cm->cmsg_type != SCM_RIGHTS) 1493 continue; 1494 qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm)) 1495 / sizeof(struct fdpass); 1496 if (qfds > 0) { 1497 rp = (struct fdpass *)CMSG_DATA(cm); 1498 op(rp, qfds); 1499 } 1500 break; /* XXX, but saves time */ 1501 } 1502 } 1503 m0 = m0->m_nextpkt; 1504 } 1505 } 1506 1507 void 1508 unp_discard(struct fdpass *rp, int nfds) 1509 { 1510 struct unp_deferral *defer; 1511 1512 /* copy the file pointers to a deferral structure */ 1513 defer = malloc(sizeof(*defer) + sizeof(*rp) * nfds, M_TEMP, M_WAITOK); 1514 defer->ud_n = nfds; 1515 memcpy(&defer->ud_fp[0], rp, sizeof(*rp) * nfds); 1516 memset(rp, 0, sizeof(*rp) * nfds); 1517 1518 rw_enter_write(&unp_df_lock); 1519 SLIST_INSERT_HEAD(&unp_deferred, defer, ud_link); 1520 rw_exit_write(&unp_df_lock); 1521 1522 task_add(systqmp, &unp_gc_task); 1523 } 1524 1525 void 1526 unp_remove_gcrefs(struct fdpass *rp, int nfds) 1527 { 1528 struct unpcb *unp; 1529 int i; 1530 1531 rw_assert_wrlock(&unp_gc_lock); 1532 1533 for (i = 0; i < nfds; i++) { 1534 if (rp[i].fp == NULL) 1535 continue; 1536 if ((unp = fptounp(rp[i].fp)) == NULL) 1537 continue; 1538 if (unp->unp_gcflags & UNP_GCDEAD) { 1539 KASSERT(unp->unp_gcrefs > 0); 1540 unp->unp_gcrefs--; 1541 } 1542 } 1543 } 1544 1545 void 1546 unp_restore_gcrefs(struct fdpass *rp, int nfds) 1547 { 1548 struct unpcb *unp; 1549 int i; 1550 1551 rw_assert_wrlock(&unp_gc_lock); 1552 1553 for (i = 0; i < nfds; i++) { 1554 if (rp[i].fp == NULL) 1555 continue; 1556 if ((unp = fptounp(rp[i].fp)) == NULL) 1557 continue; 1558 if (unp->unp_gcflags & UNP_GCDEAD) { 1559 unp->unp_gcrefs++; 1560 unp_defer++; 1561 } 1562 } 1563 } 1564 1565 int 1566 unp_nam2sun(struct mbuf *nam, struct sockaddr_un **sun, size_t *pathlen) 1567 { 1568 struct sockaddr *sa = mtod(nam, struct sockaddr *); 1569 size_t size, len; 1570 1571 if (nam->m_len < offsetof(struct sockaddr, sa_data)) 1572 return EINVAL; 1573 if (sa->sa_family != AF_UNIX) 1574 return EAFNOSUPPORT; 1575 if (sa->sa_len != nam->m_len) 1576 return EINVAL; 1577 if (sa->sa_len > sizeof(struct sockaddr_un)) 1578 return EINVAL; 1579 *sun = (struct sockaddr_un *)sa; 1580 1581 /* ensure that sun_path is NUL terminated and fits */ 1582 size = (*sun)->sun_len - offsetof(struct sockaddr_un, sun_path); 1583 len = strnlen((*sun)->sun_path, size); 1584 if (len == sizeof((*sun)->sun_path)) 1585 return EINVAL; 1586 if (len == size) { 1587 if (m_trailingspace(nam) == 0) 1588 return EINVAL; 1589 nam->m_len++; 1590 (*sun)->sun_len++; 1591 (*sun)->sun_path[len] = '\0'; 1592 } 1593 if (pathlen != NULL) 1594 *pathlen = len; 1595 1596 return 0; 1597 } 1598