1 /* $OpenBSD: uipc_usrreq.c,v 1.160 2021/12/26 23:41:41 mvs Exp $ */ 2 /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/filedesc.h> 39 #include <sys/domain.h> 40 #include <sys/protosw.h> 41 #include <sys/queue.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/unpcb.h> 45 #include <sys/un.h> 46 #include <sys/namei.h> 47 #include <sys/vnode.h> 48 #include <sys/file.h> 49 #include <sys/stat.h> 50 #include <sys/mbuf.h> 51 #include <sys/task.h> 52 #include <sys/pledge.h> 53 #include <sys/pool.h> 54 #include <sys/rwlock.h> 55 #include <sys/mutex.h> 56 #include <sys/sysctl.h> 57 #include <sys/lock.h> 58 59 /* 60 * Locks used to protect global data and struct members: 61 * I immutable after creation 62 * D unp_df_lock 63 * G unp_gc_lock 64 * U unp_lock 65 * R unp_rights_mtx 66 * a atomic 67 */ 68 69 struct rwlock unp_lock = RWLOCK_INITIALIZER("unplock"); 70 struct rwlock unp_df_lock = RWLOCK_INITIALIZER("unpdflk"); 71 struct rwlock unp_gc_lock = RWLOCK_INITIALIZER("unpgclk"); 72 73 struct mutex unp_rights_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); 74 75 /* 76 * Stack of sets of files that were passed over a socket but were 77 * not received and need to be closed. 78 */ 79 struct unp_deferral { 80 SLIST_ENTRY(unp_deferral) ud_link; /* [D] */ 81 int ud_n; /* [I] */ 82 /* followed by ud_n struct fdpass */ 83 struct fdpass ud_fp[]; /* [I] */ 84 }; 85 86 void uipc_setaddr(const struct unpcb *, struct mbuf *); 87 void unp_discard(struct fdpass *, int); 88 void unp_remove_gcrefs(struct fdpass *, int); 89 void unp_restore_gcrefs(struct fdpass *, int); 90 void unp_scan(struct mbuf *, void (*)(struct fdpass *, int)); 91 int unp_nam2sun(struct mbuf *, struct sockaddr_un **, size_t *); 92 93 struct pool unpcb_pool; 94 struct task unp_gc_task = TASK_INITIALIZER(unp_gc, NULL); 95 96 /* 97 * Unix communications domain. 98 * 99 * TODO: 100 * RDM 101 * rethink name space problems 102 * need a proper out-of-band 103 */ 104 const struct sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX }; 105 106 /* [G] list of all UNIX domain sockets, for unp_gc() */ 107 LIST_HEAD(unp_head, unpcb) unp_head = 108 LIST_HEAD_INITIALIZER(unp_head); 109 /* [D] list of sets of files that were sent over sockets that are now closed */ 110 SLIST_HEAD(,unp_deferral) unp_deferred = 111 SLIST_HEAD_INITIALIZER(unp_deferred); 112 113 ino_t unp_ino; /* [U] prototype for fake inode numbers */ 114 int unp_rights; /* [R] file descriptors in flight */ 115 int unp_defer; /* [G] number of deferred fp to close by the GC task */ 116 int unp_gcing; /* [G] GC task currently running */ 117 118 void 119 unp_init(void) 120 { 121 pool_init(&unpcb_pool, sizeof(struct unpcb), 0, 122 IPL_SOFTNET, 0, "unpcb", NULL); 123 } 124 125 void 126 uipc_setaddr(const struct unpcb *unp, struct mbuf *nam) 127 { 128 if (unp != NULL && unp->unp_addr != NULL) { 129 nam->m_len = unp->unp_addr->m_len; 130 memcpy(mtod(nam, caddr_t), mtod(unp->unp_addr, caddr_t), 131 nam->m_len); 132 } else { 133 nam->m_len = sizeof(sun_noname); 134 memcpy(mtod(nam, struct sockaddr *), &sun_noname, 135 nam->m_len); 136 } 137 } 138 139 int 140 uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 141 struct mbuf *control, struct proc *p) 142 { 143 struct unpcb *unp = sotounpcb(so); 144 struct unpcb *unp2; 145 struct socket *so2; 146 int error = 0; 147 148 if (req == PRU_CONTROL) 149 return (EOPNOTSUPP); 150 if (req != PRU_SEND && control && control->m_len) { 151 error = EOPNOTSUPP; 152 goto release; 153 } 154 if (unp == NULL) { 155 error = EINVAL; 156 goto release; 157 } 158 159 switch (req) { 160 161 case PRU_BIND: 162 error = unp_bind(unp, nam, p); 163 break; 164 165 case PRU_LISTEN: 166 if (unp->unp_vnode == NULL) 167 error = EINVAL; 168 break; 169 170 case PRU_CONNECT: 171 error = unp_connect(so, nam, p); 172 break; 173 174 case PRU_CONNECT2: 175 error = unp_connect2(so, (struct socket *)nam); 176 if (!error) { 177 unp->unp_connid.uid = p->p_ucred->cr_uid; 178 unp->unp_connid.gid = p->p_ucred->cr_gid; 179 unp->unp_connid.pid = p->p_p->ps_pid; 180 unp->unp_flags |= UNP_FEIDS; 181 unp2 = sotounpcb((struct socket *)nam); 182 unp2->unp_connid.uid = p->p_ucred->cr_uid; 183 unp2->unp_connid.gid = p->p_ucred->cr_gid; 184 unp2->unp_connid.pid = p->p_p->ps_pid; 185 unp2->unp_flags |= UNP_FEIDS; 186 } 187 break; 188 189 case PRU_DISCONNECT: 190 unp_disconnect(unp); 191 break; 192 193 case PRU_ACCEPT: 194 /* 195 * Pass back name of connected socket, 196 * if it was bound and we are still connected 197 * (our peer may have closed already!). 198 */ 199 uipc_setaddr(unp->unp_conn, nam); 200 break; 201 202 case PRU_SHUTDOWN: 203 socantsendmore(so); 204 unp_shutdown(unp); 205 break; 206 207 case PRU_RCVD: 208 switch (so->so_type) { 209 210 case SOCK_DGRAM: 211 panic("uipc 1"); 212 /*NOTREACHED*/ 213 214 case SOCK_STREAM: 215 case SOCK_SEQPACKET: 216 if (unp->unp_conn == NULL) 217 break; 218 so2 = unp->unp_conn->unp_socket; 219 /* 220 * Adjust backpressure on sender 221 * and wakeup any waiting to write. 222 */ 223 so2->so_snd.sb_mbcnt = so->so_rcv.sb_mbcnt; 224 so2->so_snd.sb_cc = so->so_rcv.sb_cc; 225 sowwakeup(so2); 226 break; 227 228 default: 229 panic("uipc 2"); 230 } 231 break; 232 233 case PRU_SEND: 234 if (control) { 235 sounlock(so, SL_LOCKED); 236 error = unp_internalize(control, p); 237 solock(so); 238 if (error) 239 break; 240 } 241 switch (so->so_type) { 242 243 case SOCK_DGRAM: { 244 const struct sockaddr *from; 245 246 if (nam) { 247 if (unp->unp_conn) { 248 error = EISCONN; 249 break; 250 } 251 error = unp_connect(so, nam, p); 252 if (error) 253 break; 254 } else { 255 if (unp->unp_conn == NULL) { 256 error = ENOTCONN; 257 break; 258 } 259 } 260 so2 = unp->unp_conn->unp_socket; 261 if (unp->unp_addr) 262 from = mtod(unp->unp_addr, struct sockaddr *); 263 else 264 from = &sun_noname; 265 if (sbappendaddr(so2, &so2->so_rcv, from, m, control)) { 266 sorwakeup(so2); 267 m = NULL; 268 control = NULL; 269 } else 270 error = ENOBUFS; 271 if (nam) 272 unp_disconnect(unp); 273 break; 274 } 275 276 case SOCK_STREAM: 277 case SOCK_SEQPACKET: 278 if (so->so_state & SS_CANTSENDMORE) { 279 error = EPIPE; 280 break; 281 } 282 if (unp->unp_conn == NULL) { 283 error = ENOTCONN; 284 break; 285 } 286 so2 = unp->unp_conn->unp_socket; 287 /* 288 * Send to paired receive port, and then raise 289 * send buffer counts to maintain backpressure. 290 * Wake up readers. 291 */ 292 if (control) { 293 if (sbappendcontrol(so2, &so2->so_rcv, m, 294 control)) { 295 control = NULL; 296 } else { 297 error = ENOBUFS; 298 break; 299 } 300 } else if (so->so_type == SOCK_SEQPACKET) 301 sbappendrecord(so2, &so2->so_rcv, m); 302 else 303 sbappend(so2, &so2->so_rcv, m); 304 so->so_snd.sb_mbcnt = so2->so_rcv.sb_mbcnt; 305 so->so_snd.sb_cc = so2->so_rcv.sb_cc; 306 if (so2->so_rcv.sb_cc > 0) 307 sorwakeup(so2); 308 m = NULL; 309 break; 310 311 default: 312 panic("uipc 4"); 313 } 314 /* we need to undo unp_internalize in case of errors */ 315 if (control && error) 316 unp_dispose(control); 317 break; 318 319 case PRU_ABORT: 320 unp_detach(unp); 321 /* 322 * As long as `unp_lock' is taken before entering 323 * uipc_usrreq() releasing it here would lead to a 324 * double unlock. 325 */ 326 sofree(so, SL_NOUNLOCK); 327 break; 328 329 case PRU_SENSE: { 330 struct stat *sb = (struct stat *)m; 331 332 sb->st_blksize = so->so_snd.sb_hiwat; 333 sb->st_dev = NODEV; 334 if (unp->unp_ino == 0) 335 unp->unp_ino = unp_ino++; 336 sb->st_atim.tv_sec = 337 sb->st_mtim.tv_sec = 338 sb->st_ctim.tv_sec = unp->unp_ctime.tv_sec; 339 sb->st_atim.tv_nsec = 340 sb->st_mtim.tv_nsec = 341 sb->st_ctim.tv_nsec = unp->unp_ctime.tv_nsec; 342 sb->st_ino = unp->unp_ino; 343 break; 344 } 345 346 case PRU_RCVOOB: 347 case PRU_SENDOOB: 348 error = EOPNOTSUPP; 349 break; 350 351 case PRU_SOCKADDR: 352 uipc_setaddr(unp, nam); 353 break; 354 355 case PRU_PEERADDR: 356 uipc_setaddr(unp->unp_conn, nam); 357 break; 358 359 case PRU_SLOWTIMO: 360 break; 361 362 default: 363 panic("uipc_usrreq"); 364 } 365 release: 366 if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) { 367 m_freem(control); 368 m_freem(m); 369 } 370 return (error); 371 } 372 373 /* 374 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 375 * for stream sockets, although the total for sender and receiver is 376 * actually only PIPSIZ. 377 * Datagram sockets really use the sendspace as the maximum datagram size, 378 * and don't really want to reserve the sendspace. Their recvspace should 379 * be large enough for at least one max-size datagram plus address. 380 */ 381 #define PIPSIZ 8192 382 u_int unpst_sendspace = PIPSIZ; 383 u_int unpst_recvspace = PIPSIZ; 384 u_int unpsq_sendspace = PIPSIZ; 385 u_int unpsq_recvspace = PIPSIZ; 386 u_int unpdg_sendspace = 2*1024; /* really max datagram size */ 387 u_int unpdg_recvspace = 16*1024; 388 389 const struct sysctl_bounded_args unpstctl_vars[] = { 390 { UNPCTL_RECVSPACE, &unpst_recvspace, 0, SB_MAX }, 391 { UNPCTL_SENDSPACE, &unpst_sendspace, 0, SB_MAX }, 392 }; 393 const struct sysctl_bounded_args unpsqctl_vars[] = { 394 { UNPCTL_RECVSPACE, &unpsq_recvspace, 0, SB_MAX }, 395 { UNPCTL_SENDSPACE, &unpsq_sendspace, 0, SB_MAX }, 396 }; 397 const struct sysctl_bounded_args unpdgctl_vars[] = { 398 { UNPCTL_RECVSPACE, &unpdg_recvspace, 0, SB_MAX }, 399 { UNPCTL_SENDSPACE, &unpdg_sendspace, 0, SB_MAX }, 400 }; 401 402 int 403 uipc_attach(struct socket *so, int proto) 404 { 405 struct unpcb *unp; 406 int error; 407 408 rw_assert_wrlock(&unp_lock); 409 410 if (so->so_pcb) 411 return EISCONN; 412 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 413 switch (so->so_type) { 414 415 case SOCK_STREAM: 416 error = soreserve(so, unpst_sendspace, unpst_recvspace); 417 break; 418 419 case SOCK_SEQPACKET: 420 error = soreserve(so, unpsq_sendspace, unpsq_recvspace); 421 break; 422 423 case SOCK_DGRAM: 424 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 425 break; 426 427 default: 428 panic("unp_attach"); 429 } 430 if (error) 431 return (error); 432 } 433 unp = pool_get(&unpcb_pool, PR_NOWAIT|PR_ZERO); 434 if (unp == NULL) 435 return (ENOBUFS); 436 unp->unp_socket = so; 437 so->so_pcb = unp; 438 getnanotime(&unp->unp_ctime); 439 440 /* 441 * Enforce `unp_gc_lock' -> `solock()' lock order. 442 */ 443 /* 444 * We also release the lock on listening socket and on our peer 445 * socket when called from unp_connect(). This is safe. The 446 * listening socket protected by vnode(9) lock. The peer socket 447 * has 'UNP_CONNECTING' flag set. 448 */ 449 sounlock(so, SL_LOCKED); 450 rw_enter_write(&unp_gc_lock); 451 LIST_INSERT_HEAD(&unp_head, unp, unp_link); 452 rw_exit_write(&unp_gc_lock); 453 solock(so); 454 return (0); 455 } 456 457 int 458 uipc_detach(struct socket *so) 459 { 460 struct unpcb *unp = sotounpcb(so); 461 462 if (unp == NULL) 463 return (EINVAL); 464 465 unp_detach(unp); 466 467 return (0); 468 } 469 470 int 471 uipc_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 472 size_t newlen) 473 { 474 int *valp = &unp_defer; 475 476 /* All sysctl names at this level are terminal. */ 477 switch (name[0]) { 478 case SOCK_STREAM: 479 if (namelen != 2) 480 return (ENOTDIR); 481 return sysctl_bounded_arr(unpstctl_vars, nitems(unpstctl_vars), 482 name + 1, namelen - 1, oldp, oldlenp, newp, newlen); 483 case SOCK_SEQPACKET: 484 if (namelen != 2) 485 return (ENOTDIR); 486 return sysctl_bounded_arr(unpsqctl_vars, nitems(unpsqctl_vars), 487 name + 1, namelen - 1, oldp, oldlenp, newp, newlen); 488 case SOCK_DGRAM: 489 if (namelen != 2) 490 return (ENOTDIR); 491 return sysctl_bounded_arr(unpdgctl_vars, nitems(unpdgctl_vars), 492 name + 1, namelen - 1, oldp, oldlenp, newp, newlen); 493 case NET_UNIX_INFLIGHT: 494 valp = &unp_rights; 495 /* FALLTHOUGH */ 496 case NET_UNIX_DEFERRED: 497 if (namelen != 1) 498 return (ENOTDIR); 499 return sysctl_rdint(oldp, oldlenp, newp, *valp); 500 default: 501 return (ENOPROTOOPT); 502 } 503 } 504 505 void 506 unp_detach(struct unpcb *unp) 507 { 508 struct socket *so = unp->unp_socket; 509 struct vnode *vp = unp->unp_vnode; 510 511 rw_assert_wrlock(&unp_lock); 512 513 unp->unp_vnode = NULL; 514 515 /* 516 * Enforce `unp_gc_lock' -> `solock()' lock order. 517 * Enforce `i_lock' -> `unp_lock' lock order. 518 */ 519 sounlock(so, SL_LOCKED); 520 521 rw_enter_write(&unp_gc_lock); 522 LIST_REMOVE(unp, unp_link); 523 rw_exit_write(&unp_gc_lock); 524 525 if (vp != NULL) { 526 VOP_LOCK(vp, LK_EXCLUSIVE); 527 vp->v_socket = NULL; 528 529 KERNEL_LOCK(); 530 vput(vp); 531 KERNEL_UNLOCK(); 532 } 533 534 solock(so); 535 536 if (unp->unp_conn) 537 unp_disconnect(unp); 538 while (!SLIST_EMPTY(&unp->unp_refs)) 539 unp_drop(SLIST_FIRST(&unp->unp_refs), ECONNRESET); 540 soisdisconnected(so); 541 so->so_pcb = NULL; 542 m_freem(unp->unp_addr); 543 pool_put(&unpcb_pool, unp); 544 if (unp_rights) 545 task_add(systqmp, &unp_gc_task); 546 } 547 548 int 549 unp_bind(struct unpcb *unp, struct mbuf *nam, struct proc *p) 550 { 551 struct sockaddr_un *soun; 552 struct mbuf *nam2; 553 struct vnode *vp; 554 struct vattr vattr; 555 int error; 556 struct nameidata nd; 557 size_t pathlen; 558 559 if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING)) 560 return (EINVAL); 561 if (unp->unp_vnode != NULL) 562 return (EINVAL); 563 if ((error = unp_nam2sun(nam, &soun, &pathlen))) 564 return (error); 565 566 unp->unp_flags |= UNP_BINDING; 567 568 /* 569 * Enforce `i_lock' -> `unplock' because fifo subsystem 570 * requires it. The socket can't be closed concurrently 571 * because the file descriptor reference is still held. 572 */ 573 574 sounlock(unp->unp_socket, SL_LOCKED); 575 576 nam2 = m_getclr(M_WAITOK, MT_SONAME); 577 nam2->m_len = sizeof(struct sockaddr_un); 578 memcpy(mtod(nam2, struct sockaddr_un *), soun, 579 offsetof(struct sockaddr_un, sun_path) + pathlen); 580 /* No need to NUL terminate: m_getclr() returns zero'd mbufs. */ 581 582 soun = mtod(nam2, struct sockaddr_un *); 583 584 /* Fixup sun_len to keep it in sync with m_len. */ 585 soun->sun_len = nam2->m_len; 586 587 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE, 588 soun->sun_path, p); 589 nd.ni_pledge = PLEDGE_UNIX; 590 591 KERNEL_LOCK(); 592 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 593 error = namei(&nd); 594 if (error != 0) { 595 m_freem(nam2); 596 solock(unp->unp_socket); 597 goto out; 598 } 599 vp = nd.ni_vp; 600 if (vp != NULL) { 601 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 602 if (nd.ni_dvp == vp) 603 vrele(nd.ni_dvp); 604 else 605 vput(nd.ni_dvp); 606 vrele(vp); 607 m_freem(nam2); 608 error = EADDRINUSE; 609 solock(unp->unp_socket); 610 goto out; 611 } 612 VATTR_NULL(&vattr); 613 vattr.va_type = VSOCK; 614 vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask; 615 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 616 vput(nd.ni_dvp); 617 if (error) { 618 m_freem(nam2); 619 solock(unp->unp_socket); 620 goto out; 621 } 622 solock(unp->unp_socket); 623 unp->unp_addr = nam2; 624 vp = nd.ni_vp; 625 vp->v_socket = unp->unp_socket; 626 unp->unp_vnode = vp; 627 unp->unp_connid.uid = p->p_ucred->cr_uid; 628 unp->unp_connid.gid = p->p_ucred->cr_gid; 629 unp->unp_connid.pid = p->p_p->ps_pid; 630 unp->unp_flags |= UNP_FEIDSBIND; 631 VOP_UNLOCK(vp); 632 out: 633 KERNEL_UNLOCK(); 634 unp->unp_flags &= ~UNP_BINDING; 635 636 return (error); 637 } 638 639 int 640 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p) 641 { 642 struct sockaddr_un *soun; 643 struct vnode *vp; 644 struct socket *so2, *so3; 645 struct unpcb *unp, *unp2, *unp3; 646 struct nameidata nd; 647 int error; 648 649 unp = sotounpcb(so); 650 if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING)) 651 return (EISCONN); 652 if ((error = unp_nam2sun(nam, &soun, NULL))) 653 return (error); 654 655 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p); 656 nd.ni_pledge = PLEDGE_UNIX; 657 658 unp->unp_flags |= UNP_CONNECTING; 659 660 /* 661 * Enforce `i_lock' -> `unplock' because fifo subsystem 662 * requires it. The socket can't be closed concurrently 663 * because the file descriptor reference is still held. 664 */ 665 666 sounlock(so, SL_LOCKED); 667 668 KERNEL_LOCK(); 669 error = namei(&nd); 670 if (error != 0) 671 goto unlock; 672 vp = nd.ni_vp; 673 if (vp->v_type != VSOCK) { 674 error = ENOTSOCK; 675 goto put; 676 } 677 if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0) 678 goto put; 679 solock(so); 680 so2 = vp->v_socket; 681 if (so2 == NULL) { 682 error = ECONNREFUSED; 683 goto put_locked; 684 } 685 if (so->so_type != so2->so_type) { 686 error = EPROTOTYPE; 687 goto put_locked; 688 } 689 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 690 if ((so2->so_options & SO_ACCEPTCONN) == 0 || 691 (so3 = sonewconn(so2, 0)) == NULL) { 692 error = ECONNREFUSED; 693 goto put_locked; 694 } 695 unp2 = sotounpcb(so2); 696 unp3 = sotounpcb(so3); 697 if (unp2->unp_addr) 698 unp3->unp_addr = 699 m_copym(unp2->unp_addr, 0, M_COPYALL, M_NOWAIT); 700 unp3->unp_connid.uid = p->p_ucred->cr_uid; 701 unp3->unp_connid.gid = p->p_ucred->cr_gid; 702 unp3->unp_connid.pid = p->p_p->ps_pid; 703 unp3->unp_flags |= UNP_FEIDS; 704 so2 = so3; 705 if (unp2->unp_flags & UNP_FEIDSBIND) { 706 unp->unp_connid = unp2->unp_connid; 707 unp->unp_flags |= UNP_FEIDS; 708 } 709 } 710 error = unp_connect2(so, so2); 711 put_locked: 712 sounlock(so, SL_LOCKED); 713 put: 714 vput(vp); 715 unlock: 716 KERNEL_UNLOCK(); 717 solock(so); 718 unp->unp_flags &= ~UNP_CONNECTING; 719 720 /* 721 * The peer socket could be closed by concurrent thread 722 * when `so' and `vp' are unlocked. 723 */ 724 if (error == 0 && unp->unp_conn == NULL) 725 error = ECONNREFUSED; 726 727 return (error); 728 } 729 730 int 731 unp_connect2(struct socket *so, struct socket *so2) 732 { 733 struct unpcb *unp = sotounpcb(so); 734 struct unpcb *unp2; 735 736 rw_assert_wrlock(&unp_lock); 737 738 if (so2->so_type != so->so_type) 739 return (EPROTOTYPE); 740 unp2 = sotounpcb(so2); 741 unp->unp_conn = unp2; 742 switch (so->so_type) { 743 744 case SOCK_DGRAM: 745 SLIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_nextref); 746 soisconnected(so); 747 break; 748 749 case SOCK_STREAM: 750 case SOCK_SEQPACKET: 751 unp2->unp_conn = unp; 752 soisconnected(so); 753 soisconnected(so2); 754 break; 755 756 default: 757 panic("unp_connect2"); 758 } 759 return (0); 760 } 761 762 void 763 unp_disconnect(struct unpcb *unp) 764 { 765 struct unpcb *unp2 = unp->unp_conn; 766 767 if (unp2 == NULL) 768 return; 769 unp->unp_conn = NULL; 770 switch (unp->unp_socket->so_type) { 771 772 case SOCK_DGRAM: 773 SLIST_REMOVE(&unp2->unp_refs, unp, unpcb, unp_nextref); 774 unp->unp_socket->so_state &= ~SS_ISCONNECTED; 775 break; 776 777 case SOCK_STREAM: 778 case SOCK_SEQPACKET: 779 unp->unp_socket->so_snd.sb_mbcnt = 0; 780 unp->unp_socket->so_snd.sb_cc = 0; 781 soisdisconnected(unp->unp_socket); 782 unp2->unp_conn = NULL; 783 unp2->unp_socket->so_snd.sb_mbcnt = 0; 784 unp2->unp_socket->so_snd.sb_cc = 0; 785 soisdisconnected(unp2->unp_socket); 786 break; 787 } 788 } 789 790 void 791 unp_shutdown(struct unpcb *unp) 792 { 793 struct socket *so; 794 795 switch (unp->unp_socket->so_type) { 796 case SOCK_STREAM: 797 case SOCK_SEQPACKET: 798 if (unp->unp_conn && (so = unp->unp_conn->unp_socket)) 799 socantrcvmore(so); 800 break; 801 default: 802 break; 803 } 804 } 805 806 void 807 unp_drop(struct unpcb *unp, int errno) 808 { 809 struct socket *so = unp->unp_socket; 810 811 rw_assert_wrlock(&unp_lock); 812 813 so->so_error = errno; 814 unp_disconnect(unp); 815 } 816 817 #ifdef notdef 818 unp_drain(void) 819 { 820 821 } 822 #endif 823 824 static struct unpcb * 825 fptounp(struct file *fp) 826 { 827 struct socket *so; 828 829 if (fp->f_type != DTYPE_SOCKET) 830 return (NULL); 831 if ((so = fp->f_data) == NULL) 832 return (NULL); 833 if (so->so_proto->pr_domain != &unixdomain) 834 return (NULL); 835 return (sotounpcb(so)); 836 } 837 838 int 839 unp_externalize(struct mbuf *rights, socklen_t controllen, int flags) 840 { 841 struct proc *p = curproc; /* XXX */ 842 struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 843 struct filedesc *fdp = p->p_fd; 844 int i, *fds = NULL; 845 struct fdpass *rp; 846 struct file *fp; 847 int nfds, error = 0; 848 849 /* 850 * This code only works because SCM_RIGHTS is the only supported 851 * control message type on unix sockets. Enforce this here. 852 */ 853 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET) 854 return EINVAL; 855 856 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / 857 sizeof(struct fdpass); 858 if (controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) 859 controllen = 0; 860 else 861 controllen -= CMSG_ALIGN(sizeof(struct cmsghdr)); 862 if (nfds > controllen / sizeof(int)) { 863 error = EMSGSIZE; 864 goto out; 865 } 866 867 /* Make sure the recipient should be able to see the descriptors.. */ 868 rp = (struct fdpass *)CMSG_DATA(cm); 869 870 /* fdp->fd_rdir requires KERNEL_LOCK() */ 871 KERNEL_LOCK(); 872 873 for (i = 0; i < nfds; i++) { 874 fp = rp->fp; 875 rp++; 876 error = pledge_recvfd(p, fp); 877 if (error) 878 break; 879 880 /* 881 * No to block devices. If passing a directory, 882 * make sure that it is underneath the root. 883 */ 884 if (fdp->fd_rdir != NULL && fp->f_type == DTYPE_VNODE) { 885 struct vnode *vp = (struct vnode *)fp->f_data; 886 887 if (vp->v_type == VBLK || 888 (vp->v_type == VDIR && 889 !vn_isunder(vp, fdp->fd_rdir, p))) { 890 error = EPERM; 891 break; 892 } 893 } 894 } 895 896 KERNEL_UNLOCK(); 897 898 if (error) 899 goto out; 900 901 fds = mallocarray(nfds, sizeof(int), M_TEMP, M_WAITOK); 902 903 fdplock(fdp); 904 restart: 905 /* 906 * First loop -- allocate file descriptor table slots for the 907 * new descriptors. 908 */ 909 rp = ((struct fdpass *)CMSG_DATA(cm)); 910 for (i = 0; i < nfds; i++) { 911 if ((error = fdalloc(p, 0, &fds[i])) != 0) { 912 /* 913 * Back out what we've done so far. 914 */ 915 for (--i; i >= 0; i--) 916 fdremove(fdp, fds[i]); 917 918 if (error == ENOSPC) { 919 fdexpand(p); 920 goto restart; 921 } 922 923 fdpunlock(fdp); 924 925 /* 926 * This is the error that has historically 927 * been returned, and some callers may 928 * expect it. 929 */ 930 931 error = EMSGSIZE; 932 goto out; 933 } 934 935 /* 936 * Make the slot reference the descriptor so that 937 * fdalloc() works properly.. We finalize it all 938 * in the loop below. 939 */ 940 mtx_enter(&fdp->fd_fplock); 941 KASSERT(fdp->fd_ofiles[fds[i]] == NULL); 942 fdp->fd_ofiles[fds[i]] = rp->fp; 943 mtx_leave(&fdp->fd_fplock); 944 945 fdp->fd_ofileflags[fds[i]] = (rp->flags & UF_PLEDGED); 946 if (flags & MSG_CMSG_CLOEXEC) 947 fdp->fd_ofileflags[fds[i]] |= UF_EXCLOSE; 948 949 rp++; 950 } 951 fdpunlock(fdp); 952 953 /* 954 * Now that adding them has succeeded, update all of the 955 * descriptor passing state. 956 */ 957 rp = (struct fdpass *)CMSG_DATA(cm); 958 959 for (i = 0; i < nfds; i++) { 960 struct unpcb *unp; 961 962 fp = rp->fp; 963 rp++; 964 if ((unp = fptounp(fp)) != NULL) { 965 rw_enter_write(&unp_gc_lock); 966 unp->unp_msgcount--; 967 rw_exit_write(&unp_gc_lock); 968 } 969 } 970 971 mtx_enter(&unp_rights_mtx); 972 unp_rights -= nfds; 973 mtx_leave(&unp_rights_mtx); 974 975 /* 976 * Copy temporary array to message and adjust length, in case of 977 * transition from large struct file pointers to ints. 978 */ 979 memcpy(CMSG_DATA(cm), fds, nfds * sizeof(int)); 980 cm->cmsg_len = CMSG_LEN(nfds * sizeof(int)); 981 rights->m_len = CMSG_LEN(nfds * sizeof(int)); 982 out: 983 if (fds != NULL) 984 free(fds, M_TEMP, nfds * sizeof(int)); 985 986 if (error) { 987 if (nfds > 0) { 988 /* 989 * No lock required. We are the only `cm' holder. 990 */ 991 rp = ((struct fdpass *)CMSG_DATA(cm)); 992 unp_discard(rp, nfds); 993 } 994 } 995 996 return (error); 997 } 998 999 int 1000 unp_internalize(struct mbuf *control, struct proc *p) 1001 { 1002 struct filedesc *fdp = p->p_fd; 1003 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1004 struct fdpass *rp; 1005 struct file *fp; 1006 struct unpcb *unp; 1007 int i, error; 1008 int nfds, *ip, fd, neededspace; 1009 1010 /* 1011 * Check for two potential msg_controllen values because 1012 * IETF stuck their nose in a place it does not belong. 1013 */ 1014 if (control->m_len < CMSG_LEN(0) || cm->cmsg_len < CMSG_LEN(0)) 1015 return (EINVAL); 1016 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || 1017 !(cm->cmsg_len == control->m_len || 1018 control->m_len == CMSG_ALIGN(cm->cmsg_len))) 1019 return (EINVAL); 1020 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int); 1021 1022 mtx_enter(&unp_rights_mtx); 1023 if (unp_rights + nfds > maxfiles / 10) { 1024 mtx_leave(&unp_rights_mtx); 1025 return (EMFILE); 1026 } 1027 unp_rights += nfds; 1028 mtx_leave(&unp_rights_mtx); 1029 1030 /* Make sure we have room for the struct file pointers */ 1031 morespace: 1032 neededspace = CMSG_SPACE(nfds * sizeof(struct fdpass)) - 1033 control->m_len; 1034 if (neededspace > m_trailingspace(control)) { 1035 char *tmp; 1036 /* if we already have a cluster, the message is just too big */ 1037 if (control->m_flags & M_EXT) { 1038 error = E2BIG; 1039 goto nospace; 1040 } 1041 1042 /* copy cmsg data temporarily out of the mbuf */ 1043 tmp = malloc(control->m_len, M_TEMP, M_WAITOK); 1044 memcpy(tmp, mtod(control, caddr_t), control->m_len); 1045 1046 /* allocate a cluster and try again */ 1047 MCLGET(control, M_WAIT); 1048 if ((control->m_flags & M_EXT) == 0) { 1049 free(tmp, M_TEMP, control->m_len); 1050 error = ENOBUFS; /* allocation failed */ 1051 goto nospace; 1052 } 1053 1054 /* copy the data back into the cluster */ 1055 cm = mtod(control, struct cmsghdr *); 1056 memcpy(cm, tmp, control->m_len); 1057 free(tmp, M_TEMP, control->m_len); 1058 goto morespace; 1059 } 1060 1061 /* adjust message & mbuf to note amount of space actually used. */ 1062 cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct fdpass)); 1063 control->m_len = CMSG_SPACE(nfds * sizeof(struct fdpass)); 1064 1065 ip = ((int *)CMSG_DATA(cm)) + nfds - 1; 1066 rp = ((struct fdpass *)CMSG_DATA(cm)) + nfds - 1; 1067 fdplock(fdp); 1068 for (i = 0; i < nfds; i++) { 1069 memcpy(&fd, ip, sizeof fd); 1070 ip--; 1071 if ((fp = fd_getfile(fdp, fd)) == NULL) { 1072 error = EBADF; 1073 goto fail; 1074 } 1075 if (fp->f_count >= FDUP_MAX_COUNT) { 1076 error = EDEADLK; 1077 goto fail; 1078 } 1079 error = pledge_sendfd(p, fp); 1080 if (error) 1081 goto fail; 1082 1083 /* kqueue descriptors cannot be copied */ 1084 if (fp->f_type == DTYPE_KQUEUE) { 1085 error = EINVAL; 1086 goto fail; 1087 } 1088 rp->fp = fp; 1089 rp->flags = fdp->fd_ofileflags[fd] & UF_PLEDGED; 1090 rp--; 1091 if ((unp = fptounp(fp)) != NULL) { 1092 rw_enter_write(&unp_gc_lock); 1093 unp->unp_msgcount++; 1094 unp->unp_file = fp; 1095 rw_exit_write(&unp_gc_lock); 1096 } 1097 } 1098 fdpunlock(fdp); 1099 return (0); 1100 fail: 1101 fdpunlock(fdp); 1102 if (fp != NULL) 1103 FRELE(fp, p); 1104 /* Back out what we just did. */ 1105 for ( ; i > 0; i--) { 1106 rp++; 1107 fp = rp->fp; 1108 if ((unp = fptounp(fp)) != NULL) { 1109 rw_enter_write(&unp_gc_lock); 1110 unp->unp_msgcount--; 1111 rw_exit_write(&unp_gc_lock); 1112 } 1113 FRELE(fp, p); 1114 } 1115 1116 nospace: 1117 mtx_enter(&unp_rights_mtx); 1118 unp_rights -= nfds; 1119 mtx_leave(&unp_rights_mtx); 1120 1121 return (error); 1122 } 1123 1124 void 1125 unp_gc(void *arg __unused) 1126 { 1127 struct unp_deferral *defer; 1128 struct file *fp; 1129 struct socket *so; 1130 struct unpcb *unp; 1131 int nunref, i; 1132 1133 rw_enter_write(&unp_gc_lock); 1134 if (unp_gcing) 1135 goto unlock; 1136 unp_gcing = 1; 1137 rw_exit_write(&unp_gc_lock); 1138 1139 rw_enter_write(&unp_df_lock); 1140 /* close any fds on the deferred list */ 1141 while ((defer = SLIST_FIRST(&unp_deferred)) != NULL) { 1142 SLIST_REMOVE_HEAD(&unp_deferred, ud_link); 1143 rw_exit_write(&unp_df_lock); 1144 for (i = 0; i < defer->ud_n; i++) { 1145 fp = defer->ud_fp[i].fp; 1146 if (fp == NULL) 1147 continue; 1148 if ((unp = fptounp(fp)) != NULL) { 1149 rw_enter_write(&unp_gc_lock); 1150 unp->unp_msgcount--; 1151 rw_exit_write(&unp_gc_lock); 1152 } 1153 mtx_enter(&unp_rights_mtx); 1154 unp_rights--; 1155 mtx_leave(&unp_rights_mtx); 1156 /* closef() expects a refcount of 2 */ 1157 FREF(fp); 1158 (void) closef(fp, NULL); 1159 } 1160 free(defer, M_TEMP, sizeof(*defer) + 1161 sizeof(struct fdpass) * defer->ud_n); 1162 rw_enter_write(&unp_df_lock); 1163 } 1164 rw_exit_write(&unp_df_lock); 1165 1166 nunref = 0; 1167 1168 rw_enter_write(&unp_gc_lock); 1169 1170 /* 1171 * Determine sockets which may be prospectively dead. Such 1172 * sockets have their `unp_msgcount' equal to the `f_count'. 1173 * If `unp_msgcount' is 0, the socket has not been passed 1174 * and can't be unreferenced. 1175 */ 1176 LIST_FOREACH(unp, &unp_head, unp_link) { 1177 unp->unp_gcflags = 0; 1178 1179 if (unp->unp_msgcount == 0) 1180 continue; 1181 if ((fp = unp->unp_file) == NULL) 1182 continue; 1183 if (fp->f_count == unp->unp_msgcount) { 1184 unp->unp_gcflags |= UNP_GCDEAD; 1185 unp->unp_gcrefs = unp->unp_msgcount; 1186 nunref++; 1187 } 1188 } 1189 1190 /* 1191 * Scan all sockets previously marked as dead. Remove 1192 * the `unp_gcrefs' reference each socket holds on any 1193 * dead socket in its buffer. 1194 */ 1195 LIST_FOREACH(unp, &unp_head, unp_link) { 1196 if ((unp->unp_gcflags & UNP_GCDEAD) == 0) 1197 continue; 1198 so = unp->unp_socket; 1199 solock(so); 1200 unp_scan(so->so_rcv.sb_mb, unp_remove_gcrefs); 1201 sounlock(so, SL_LOCKED); 1202 } 1203 1204 /* 1205 * If the dead socket has `unp_gcrefs' reference counter 1206 * greater than 0, it can't be unreferenced. Mark it as 1207 * alive and increment the `unp_gcrefs' reference for each 1208 * dead socket within its buffer. Repeat this until we 1209 * have no new alive sockets found. 1210 */ 1211 do { 1212 unp_defer = 0; 1213 1214 LIST_FOREACH(unp, &unp_head, unp_link) { 1215 if ((unp->unp_gcflags & UNP_GCDEAD) == 0) 1216 continue; 1217 if (unp->unp_gcrefs == 0) 1218 continue; 1219 1220 unp->unp_gcflags &= ~UNP_GCDEAD; 1221 1222 so = unp->unp_socket; 1223 solock(so); 1224 unp_scan(so->so_rcv.sb_mb, unp_restore_gcrefs); 1225 sounlock(so, SL_LOCKED); 1226 1227 KASSERT(nunref > 0); 1228 nunref--; 1229 } 1230 } while (unp_defer > 0); 1231 1232 /* 1233 * If there are any unreferenced sockets, then for each dispose 1234 * of files in its receive buffer and then close it. 1235 */ 1236 if (nunref) { 1237 LIST_FOREACH(unp, &unp_head, unp_link) { 1238 if (unp->unp_gcflags & UNP_GCDEAD) { 1239 /* 1240 * This socket could still be connected 1241 * and if so it's `so_rcv' is still 1242 * accessible by concurrent PRU_SEND 1243 * thread. 1244 */ 1245 so = unp->unp_socket; 1246 solock(so); 1247 unp_scan(so->so_rcv.sb_mb, unp_discard); 1248 sounlock(so, SL_LOCKED); 1249 } 1250 } 1251 } 1252 1253 unp_gcing = 0; 1254 unlock: 1255 rw_exit_write(&unp_gc_lock); 1256 } 1257 1258 void 1259 unp_dispose(struct mbuf *m) 1260 { 1261 1262 if (m) 1263 unp_scan(m, unp_discard); 1264 } 1265 1266 void 1267 unp_scan(struct mbuf *m0, void (*op)(struct fdpass *, int)) 1268 { 1269 struct mbuf *m; 1270 struct fdpass *rp; 1271 struct cmsghdr *cm; 1272 int qfds; 1273 1274 while (m0) { 1275 for (m = m0; m; m = m->m_next) { 1276 if (m->m_type == MT_CONTROL && 1277 m->m_len >= sizeof(*cm)) { 1278 cm = mtod(m, struct cmsghdr *); 1279 if (cm->cmsg_level != SOL_SOCKET || 1280 cm->cmsg_type != SCM_RIGHTS) 1281 continue; 1282 qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm)) 1283 / sizeof(struct fdpass); 1284 if (qfds > 0) { 1285 rp = (struct fdpass *)CMSG_DATA(cm); 1286 op(rp, qfds); 1287 } 1288 break; /* XXX, but saves time */ 1289 } 1290 } 1291 m0 = m0->m_nextpkt; 1292 } 1293 } 1294 1295 void 1296 unp_discard(struct fdpass *rp, int nfds) 1297 { 1298 struct unp_deferral *defer; 1299 1300 /* copy the file pointers to a deferral structure */ 1301 defer = malloc(sizeof(*defer) + sizeof(*rp) * nfds, M_TEMP, M_WAITOK); 1302 defer->ud_n = nfds; 1303 memcpy(&defer->ud_fp[0], rp, sizeof(*rp) * nfds); 1304 memset(rp, 0, sizeof(*rp) * nfds); 1305 1306 rw_enter_write(&unp_df_lock); 1307 SLIST_INSERT_HEAD(&unp_deferred, defer, ud_link); 1308 rw_exit_write(&unp_df_lock); 1309 1310 task_add(systqmp, &unp_gc_task); 1311 } 1312 1313 void 1314 unp_remove_gcrefs(struct fdpass *rp, int nfds) 1315 { 1316 struct unpcb *unp; 1317 int i; 1318 1319 rw_assert_wrlock(&unp_gc_lock); 1320 1321 for (i = 0; i < nfds; i++) { 1322 if (rp[i].fp == NULL) 1323 continue; 1324 if ((unp = fptounp(rp[i].fp)) == NULL) 1325 continue; 1326 if (unp->unp_gcflags & UNP_GCDEAD) { 1327 KASSERT(unp->unp_gcrefs > 0); 1328 unp->unp_gcrefs--; 1329 } 1330 } 1331 } 1332 1333 void 1334 unp_restore_gcrefs(struct fdpass *rp, int nfds) 1335 { 1336 struct unpcb *unp; 1337 int i; 1338 1339 rw_assert_wrlock(&unp_gc_lock); 1340 1341 for (i = 0; i < nfds; i++) { 1342 if (rp[i].fp == NULL) 1343 continue; 1344 if ((unp = fptounp(rp[i].fp)) == NULL) 1345 continue; 1346 if (unp->unp_gcflags & UNP_GCDEAD) { 1347 unp->unp_gcrefs++; 1348 unp_defer++; 1349 } 1350 } 1351 } 1352 1353 int 1354 unp_nam2sun(struct mbuf *nam, struct sockaddr_un **sun, size_t *pathlen) 1355 { 1356 struct sockaddr *sa = mtod(nam, struct sockaddr *); 1357 size_t size, len; 1358 1359 if (nam->m_len < offsetof(struct sockaddr, sa_data)) 1360 return EINVAL; 1361 if (sa->sa_family != AF_UNIX) 1362 return EAFNOSUPPORT; 1363 if (sa->sa_len != nam->m_len) 1364 return EINVAL; 1365 if (sa->sa_len > sizeof(struct sockaddr_un)) 1366 return EINVAL; 1367 *sun = (struct sockaddr_un *)sa; 1368 1369 /* ensure that sun_path is NUL terminated and fits */ 1370 size = (*sun)->sun_len - offsetof(struct sockaddr_un, sun_path); 1371 len = strnlen((*sun)->sun_path, size); 1372 if (len == sizeof((*sun)->sun_path)) 1373 return EINVAL; 1374 if (len == size) { 1375 if (m_trailingspace(nam) == 0) 1376 return EINVAL; 1377 nam->m_len++; 1378 (*sun)->sun_len++; 1379 (*sun)->sun_path[len] = '\0'; 1380 } 1381 if (pathlen != NULL) 1382 *pathlen = len; 1383 1384 return 0; 1385 } 1386