1 /* $OpenBSD: uipc_usrreq.c,v 1.150 2021/10/21 22:11:07 mvs Exp $ */ 2 /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/filedesc.h> 39 #include <sys/domain.h> 40 #include <sys/protosw.h> 41 #include <sys/queue.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/unpcb.h> 45 #include <sys/un.h> 46 #include <sys/namei.h> 47 #include <sys/vnode.h> 48 #include <sys/file.h> 49 #include <sys/stat.h> 50 #include <sys/mbuf.h> 51 #include <sys/task.h> 52 #include <sys/pledge.h> 53 #include <sys/pool.h> 54 #include <sys/rwlock.h> 55 #include <sys/sysctl.h> 56 57 /* 58 * Locks used to protect global data and struct members: 59 * I immutable after creation 60 * U unp_lock 61 */ 62 struct rwlock unp_lock = RWLOCK_INITIALIZER("unplock"); 63 64 /* 65 * Stack of sets of files that were passed over a socket but were 66 * not received and need to be closed. 67 */ 68 struct unp_deferral { 69 SLIST_ENTRY(unp_deferral) ud_link; /* [U] */ 70 int ud_n; /* [I] */ 71 /* followed by ud_n struct fdpass */ 72 struct fdpass ud_fp[]; /* [I] */ 73 }; 74 75 void uipc_setaddr(const struct unpcb *, struct mbuf *); 76 void unp_discard(struct fdpass *, int); 77 void unp_mark(struct fdpass *, int); 78 void unp_scan(struct mbuf *, void (*)(struct fdpass *, int)); 79 int unp_nam2sun(struct mbuf *, struct sockaddr_un **, size_t *); 80 81 struct pool unpcb_pool; 82 struct task unp_gc_task = TASK_INITIALIZER(unp_gc, NULL); 83 84 /* 85 * Unix communications domain. 86 * 87 * TODO: 88 * RDM 89 * rethink name space problems 90 * need a proper out-of-band 91 */ 92 const struct sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX }; 93 94 /* [U] list of all UNIX domain sockets, for unp_gc() */ 95 LIST_HEAD(unp_head, unpcb) unp_head = 96 LIST_HEAD_INITIALIZER(unp_head); 97 /* [U] list of sets of files that were sent over sockets that are now closed */ 98 SLIST_HEAD(,unp_deferral) unp_deferred = 99 SLIST_HEAD_INITIALIZER(unp_deferred); 100 101 ino_t unp_ino; /* [U] prototype for fake inode numbers */ 102 int unp_rights; /* [U] file descriptors in flight */ 103 int unp_defer; /* [U] number of deferred fp to close by the GC task */ 104 int unp_gcing; /* [U] GC task currently running */ 105 106 void 107 unp_init(void) 108 { 109 pool_init(&unpcb_pool, sizeof(struct unpcb), 0, 110 IPL_SOFTNET, 0, "unpcb", NULL); 111 } 112 113 void 114 uipc_setaddr(const struct unpcb *unp, struct mbuf *nam) 115 { 116 if (unp != NULL && unp->unp_addr != NULL) { 117 nam->m_len = unp->unp_addr->m_len; 118 memcpy(mtod(nam, caddr_t), mtod(unp->unp_addr, caddr_t), 119 nam->m_len); 120 } else { 121 nam->m_len = sizeof(sun_noname); 122 memcpy(mtod(nam, struct sockaddr *), &sun_noname, 123 nam->m_len); 124 } 125 } 126 127 int 128 uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 129 struct mbuf *control, struct proc *p) 130 { 131 struct unpcb *unp = sotounpcb(so); 132 struct unpcb *unp2; 133 struct socket *so2; 134 int error = 0; 135 136 if (req == PRU_CONTROL) 137 return (EOPNOTSUPP); 138 if (req != PRU_SEND && control && control->m_len) { 139 error = EOPNOTSUPP; 140 goto release; 141 } 142 if (unp == NULL) { 143 error = EINVAL; 144 goto release; 145 } 146 147 switch (req) { 148 149 case PRU_BIND: 150 error = unp_bind(unp, nam, p); 151 break; 152 153 case PRU_LISTEN: 154 if (unp->unp_vnode == NULL) 155 error = EINVAL; 156 break; 157 158 case PRU_CONNECT: 159 error = unp_connect(so, nam, p); 160 break; 161 162 case PRU_CONNECT2: 163 error = unp_connect2(so, (struct socket *)nam); 164 if (!error) { 165 unp->unp_connid.uid = p->p_ucred->cr_uid; 166 unp->unp_connid.gid = p->p_ucred->cr_gid; 167 unp->unp_connid.pid = p->p_p->ps_pid; 168 unp->unp_flags |= UNP_FEIDS; 169 unp2 = sotounpcb((struct socket *)nam); 170 unp2->unp_connid.uid = p->p_ucred->cr_uid; 171 unp2->unp_connid.gid = p->p_ucred->cr_gid; 172 unp2->unp_connid.pid = p->p_p->ps_pid; 173 unp2->unp_flags |= UNP_FEIDS; 174 } 175 break; 176 177 case PRU_DISCONNECT: 178 unp_disconnect(unp); 179 break; 180 181 case PRU_ACCEPT: 182 /* 183 * Pass back name of connected socket, 184 * if it was bound and we are still connected 185 * (our peer may have closed already!). 186 */ 187 uipc_setaddr(unp->unp_conn, nam); 188 break; 189 190 case PRU_SHUTDOWN: 191 socantsendmore(so); 192 unp_shutdown(unp); 193 break; 194 195 case PRU_RCVD: 196 switch (so->so_type) { 197 198 case SOCK_DGRAM: 199 panic("uipc 1"); 200 /*NOTREACHED*/ 201 202 case SOCK_STREAM: 203 case SOCK_SEQPACKET: 204 if (unp->unp_conn == NULL) 205 break; 206 so2 = unp->unp_conn->unp_socket; 207 /* 208 * Adjust backpressure on sender 209 * and wakeup any waiting to write. 210 */ 211 so2->so_snd.sb_mbcnt = so->so_rcv.sb_mbcnt; 212 so2->so_snd.sb_cc = so->so_rcv.sb_cc; 213 sowwakeup(so2); 214 break; 215 216 default: 217 panic("uipc 2"); 218 } 219 break; 220 221 case PRU_SEND: 222 if (control) { 223 sounlock(so, SL_LOCKED); 224 error = unp_internalize(control, p); 225 solock(so); 226 if (error) 227 break; 228 } 229 switch (so->so_type) { 230 231 case SOCK_DGRAM: { 232 const struct sockaddr *from; 233 234 if (nam) { 235 if (unp->unp_conn) { 236 error = EISCONN; 237 break; 238 } 239 error = unp_connect(so, nam, p); 240 if (error) 241 break; 242 } else { 243 if (unp->unp_conn == NULL) { 244 error = ENOTCONN; 245 break; 246 } 247 } 248 so2 = unp->unp_conn->unp_socket; 249 if (unp->unp_addr) 250 from = mtod(unp->unp_addr, struct sockaddr *); 251 else 252 from = &sun_noname; 253 if (sbappendaddr(so2, &so2->so_rcv, from, m, control)) { 254 sorwakeup(so2); 255 m = NULL; 256 control = NULL; 257 } else 258 error = ENOBUFS; 259 if (nam) 260 unp_disconnect(unp); 261 break; 262 } 263 264 case SOCK_STREAM: 265 case SOCK_SEQPACKET: 266 if (so->so_state & SS_CANTSENDMORE) { 267 error = EPIPE; 268 break; 269 } 270 if (unp->unp_conn == NULL) { 271 error = ENOTCONN; 272 break; 273 } 274 so2 = unp->unp_conn->unp_socket; 275 /* 276 * Send to paired receive port, and then raise 277 * send buffer counts to maintain backpressure. 278 * Wake up readers. 279 */ 280 if (control) { 281 if (sbappendcontrol(so2, &so2->so_rcv, m, 282 control)) { 283 control = NULL; 284 } else { 285 error = ENOBUFS; 286 break; 287 } 288 } else if (so->so_type == SOCK_SEQPACKET) 289 sbappendrecord(so2, &so2->so_rcv, m); 290 else 291 sbappend(so2, &so2->so_rcv, m); 292 so->so_snd.sb_mbcnt = so2->so_rcv.sb_mbcnt; 293 so->so_snd.sb_cc = so2->so_rcv.sb_cc; 294 if (so2->so_rcv.sb_cc > 0) 295 sorwakeup(so2); 296 m = NULL; 297 break; 298 299 default: 300 panic("uipc 4"); 301 } 302 /* we need to undo unp_internalize in case of errors */ 303 if (control && error) 304 unp_dispose(control); 305 break; 306 307 case PRU_ABORT: 308 unp_drop(unp, ECONNABORTED); 309 break; 310 311 case PRU_SENSE: { 312 struct stat *sb = (struct stat *)m; 313 314 sb->st_blksize = so->so_snd.sb_hiwat; 315 sb->st_dev = NODEV; 316 if (unp->unp_ino == 0) 317 unp->unp_ino = unp_ino++; 318 sb->st_atim.tv_sec = 319 sb->st_mtim.tv_sec = 320 sb->st_ctim.tv_sec = unp->unp_ctime.tv_sec; 321 sb->st_atim.tv_nsec = 322 sb->st_mtim.tv_nsec = 323 sb->st_ctim.tv_nsec = unp->unp_ctime.tv_nsec; 324 sb->st_ino = unp->unp_ino; 325 break; 326 } 327 328 case PRU_RCVOOB: 329 case PRU_SENDOOB: 330 error = EOPNOTSUPP; 331 break; 332 333 case PRU_SOCKADDR: 334 uipc_setaddr(unp, nam); 335 break; 336 337 case PRU_PEERADDR: 338 uipc_setaddr(unp->unp_conn, nam); 339 break; 340 341 case PRU_SLOWTIMO: 342 break; 343 344 default: 345 panic("uipc_usrreq"); 346 } 347 release: 348 if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) { 349 m_freem(control); 350 m_freem(m); 351 } 352 return (error); 353 } 354 355 /* 356 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 357 * for stream sockets, although the total for sender and receiver is 358 * actually only PIPSIZ. 359 * Datagram sockets really use the sendspace as the maximum datagram size, 360 * and don't really want to reserve the sendspace. Their recvspace should 361 * be large enough for at least one max-size datagram plus address. 362 */ 363 #define PIPSIZ 8192 364 u_int unpst_sendspace = PIPSIZ; 365 u_int unpst_recvspace = PIPSIZ; 366 u_int unpsq_sendspace = PIPSIZ; 367 u_int unpsq_recvspace = PIPSIZ; 368 u_int unpdg_sendspace = 2*1024; /* really max datagram size */ 369 u_int unpdg_recvspace = 16*1024; 370 371 const struct sysctl_bounded_args unpstctl_vars[] = { 372 { UNPCTL_RECVSPACE, &unpst_recvspace, 0, SB_MAX }, 373 { UNPCTL_SENDSPACE, &unpst_sendspace, 0, SB_MAX }, 374 }; 375 const struct sysctl_bounded_args unpsqctl_vars[] = { 376 { UNPCTL_RECVSPACE, &unpsq_recvspace, 0, SB_MAX }, 377 { UNPCTL_SENDSPACE, &unpsq_sendspace, 0, SB_MAX }, 378 }; 379 const struct sysctl_bounded_args unpdgctl_vars[] = { 380 { UNPCTL_RECVSPACE, &unpdg_recvspace, 0, SB_MAX }, 381 { UNPCTL_SENDSPACE, &unpdg_sendspace, 0, SB_MAX }, 382 }; 383 384 int 385 uipc_attach(struct socket *so, int proto) 386 { 387 struct unpcb *unp; 388 int error; 389 390 rw_assert_wrlock(&unp_lock); 391 392 if (so->so_pcb) 393 return EISCONN; 394 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 395 switch (so->so_type) { 396 397 case SOCK_STREAM: 398 error = soreserve(so, unpst_sendspace, unpst_recvspace); 399 break; 400 401 case SOCK_SEQPACKET: 402 error = soreserve(so, unpsq_sendspace, unpsq_recvspace); 403 break; 404 405 case SOCK_DGRAM: 406 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 407 break; 408 409 default: 410 panic("unp_attach"); 411 } 412 if (error) 413 return (error); 414 } 415 unp = pool_get(&unpcb_pool, PR_NOWAIT|PR_ZERO); 416 if (unp == NULL) 417 return (ENOBUFS); 418 unp->unp_socket = so; 419 so->so_pcb = unp; 420 getnanotime(&unp->unp_ctime); 421 LIST_INSERT_HEAD(&unp_head, unp, unp_link); 422 return (0); 423 } 424 425 int 426 uipc_detach(struct socket *so) 427 { 428 struct unpcb *unp = sotounpcb(so); 429 430 if (unp == NULL) 431 return (EINVAL); 432 433 unp_detach(unp); 434 435 return (0); 436 } 437 438 int 439 uipc_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 440 size_t newlen) 441 { 442 int *valp = &unp_defer; 443 444 /* All sysctl names at this level are terminal. */ 445 switch (name[0]) { 446 case SOCK_STREAM: 447 if (namelen != 2) 448 return (ENOTDIR); 449 return sysctl_bounded_arr(unpstctl_vars, nitems(unpstctl_vars), 450 name + 1, namelen - 1, oldp, oldlenp, newp, newlen); 451 case SOCK_SEQPACKET: 452 if (namelen != 2) 453 return (ENOTDIR); 454 return sysctl_bounded_arr(unpsqctl_vars, nitems(unpsqctl_vars), 455 name + 1, namelen - 1, oldp, oldlenp, newp, newlen); 456 case SOCK_DGRAM: 457 if (namelen != 2) 458 return (ENOTDIR); 459 return sysctl_bounded_arr(unpdgctl_vars, nitems(unpdgctl_vars), 460 name + 1, namelen - 1, oldp, oldlenp, newp, newlen); 461 case NET_UNIX_INFLIGHT: 462 valp = &unp_rights; 463 /* FALLTHOUGH */ 464 case NET_UNIX_DEFERRED: 465 if (namelen != 1) 466 return (ENOTDIR); 467 return sysctl_rdint(oldp, oldlenp, newp, *valp); 468 default: 469 return (ENOPROTOOPT); 470 } 471 } 472 473 void 474 unp_detach(struct unpcb *unp) 475 { 476 struct socket *so = unp->unp_socket; 477 struct vnode *vp = NULL; 478 479 rw_assert_wrlock(&unp_lock); 480 481 LIST_REMOVE(unp, unp_link); 482 if (unp->unp_vnode) { 483 /* 484 * `v_socket' is only read in unp_connect and 485 * unplock prevents concurrent access. 486 */ 487 488 unp->unp_vnode->v_socket = NULL; 489 vp = unp->unp_vnode; 490 unp->unp_vnode = NULL; 491 } 492 493 if (unp->unp_conn) 494 unp_disconnect(unp); 495 while (!SLIST_EMPTY(&unp->unp_refs)) 496 unp_drop(SLIST_FIRST(&unp->unp_refs), ECONNRESET); 497 soisdisconnected(so); 498 so->so_pcb = NULL; 499 m_freem(unp->unp_addr); 500 pool_put(&unpcb_pool, unp); 501 if (unp_rights) 502 task_add(systqmp, &unp_gc_task); 503 504 if (vp != NULL) { 505 /* 506 * Enforce `i_lock' -> `unplock' because fifo subsystem 507 * requires it. The socket can't be closed concurrently 508 * because the file descriptor reference is 509 * still hold. 510 */ 511 512 sounlock(so, SL_LOCKED); 513 KERNEL_LOCK(); 514 vrele(vp); 515 KERNEL_UNLOCK(); 516 solock(so); 517 } 518 } 519 520 int 521 unp_bind(struct unpcb *unp, struct mbuf *nam, struct proc *p) 522 { 523 struct sockaddr_un *soun; 524 struct mbuf *nam2; 525 struct vnode *vp; 526 struct vattr vattr; 527 int error; 528 struct nameidata nd; 529 size_t pathlen; 530 531 if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING)) 532 return (EINVAL); 533 if (unp->unp_vnode != NULL) 534 return (EINVAL); 535 if ((error = unp_nam2sun(nam, &soun, &pathlen))) 536 return (error); 537 538 unp->unp_flags |= UNP_BINDING; 539 540 /* 541 * Enforce `i_lock' -> `unplock' because fifo subsystem 542 * requires it. The socket can't be closed concurrently 543 * because the file descriptor reference is still held. 544 */ 545 546 sounlock(unp->unp_socket, SL_LOCKED); 547 548 nam2 = m_getclr(M_WAITOK, MT_SONAME); 549 nam2->m_len = sizeof(struct sockaddr_un); 550 memcpy(mtod(nam2, struct sockaddr_un *), soun, 551 offsetof(struct sockaddr_un, sun_path) + pathlen); 552 /* No need to NUL terminate: m_getclr() returns zero'd mbufs. */ 553 554 soun = mtod(nam2, struct sockaddr_un *); 555 556 /* Fixup sun_len to keep it in sync with m_len. */ 557 soun->sun_len = nam2->m_len; 558 559 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE, 560 soun->sun_path, p); 561 nd.ni_pledge = PLEDGE_UNIX; 562 563 KERNEL_LOCK(); 564 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 565 error = namei(&nd); 566 if (error != 0) { 567 m_freem(nam2); 568 solock(unp->unp_socket); 569 goto out; 570 } 571 vp = nd.ni_vp; 572 if (vp != NULL) { 573 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 574 if (nd.ni_dvp == vp) 575 vrele(nd.ni_dvp); 576 else 577 vput(nd.ni_dvp); 578 vrele(vp); 579 m_freem(nam2); 580 error = EADDRINUSE; 581 solock(unp->unp_socket); 582 goto out; 583 } 584 VATTR_NULL(&vattr); 585 vattr.va_type = VSOCK; 586 vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask; 587 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 588 vput(nd.ni_dvp); 589 if (error) { 590 m_freem(nam2); 591 solock(unp->unp_socket); 592 goto out; 593 } 594 solock(unp->unp_socket); 595 unp->unp_addr = nam2; 596 vp = nd.ni_vp; 597 vp->v_socket = unp->unp_socket; 598 unp->unp_vnode = vp; 599 unp->unp_connid.uid = p->p_ucred->cr_uid; 600 unp->unp_connid.gid = p->p_ucred->cr_gid; 601 unp->unp_connid.pid = p->p_p->ps_pid; 602 unp->unp_flags |= UNP_FEIDSBIND; 603 VOP_UNLOCK(vp); 604 out: 605 KERNEL_UNLOCK(); 606 unp->unp_flags &= ~UNP_BINDING; 607 608 return (error); 609 } 610 611 int 612 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p) 613 { 614 struct sockaddr_un *soun; 615 struct vnode *vp; 616 struct socket *so2, *so3; 617 struct unpcb *unp, *unp2, *unp3; 618 struct nameidata nd; 619 int error; 620 621 unp = sotounpcb(so); 622 if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING)) 623 return (EISCONN); 624 if ((error = unp_nam2sun(nam, &soun, NULL))) 625 return (error); 626 627 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p); 628 nd.ni_pledge = PLEDGE_UNIX; 629 630 unp->unp_flags |= UNP_CONNECTING; 631 632 /* 633 * Enforce `i_lock' -> `unplock' because fifo subsystem 634 * requires it. The socket can't be closed concurrently 635 * because the file descriptor reference is still held. 636 */ 637 638 sounlock(so, SL_LOCKED); 639 640 KERNEL_LOCK(); 641 error = namei(&nd); 642 if (error != 0) 643 goto unlock; 644 vp = nd.ni_vp; 645 if (vp->v_type != VSOCK) { 646 error = ENOTSOCK; 647 goto put; 648 } 649 if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0) 650 goto put; 651 solock(so); 652 so2 = vp->v_socket; 653 if (so2 == NULL) { 654 error = ECONNREFUSED; 655 goto put_locked; 656 } 657 if (so->so_type != so2->so_type) { 658 error = EPROTOTYPE; 659 goto put_locked; 660 } 661 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 662 if ((so2->so_options & SO_ACCEPTCONN) == 0 || 663 (so3 = sonewconn(so2, 0)) == 0) { 664 error = ECONNREFUSED; 665 goto put_locked; 666 } 667 unp2 = sotounpcb(so2); 668 unp3 = sotounpcb(so3); 669 if (unp2->unp_addr) 670 unp3->unp_addr = 671 m_copym(unp2->unp_addr, 0, M_COPYALL, M_NOWAIT); 672 unp3->unp_connid.uid = p->p_ucred->cr_uid; 673 unp3->unp_connid.gid = p->p_ucred->cr_gid; 674 unp3->unp_connid.pid = p->p_p->ps_pid; 675 unp3->unp_flags |= UNP_FEIDS; 676 so2 = so3; 677 if (unp2->unp_flags & UNP_FEIDSBIND) { 678 unp->unp_connid = unp2->unp_connid; 679 unp->unp_flags |= UNP_FEIDS; 680 } 681 } 682 error = unp_connect2(so, so2); 683 put_locked: 684 sounlock(so, SL_LOCKED); 685 put: 686 vput(vp); 687 unlock: 688 KERNEL_UNLOCK(); 689 solock(so); 690 unp->unp_flags &= ~UNP_CONNECTING; 691 692 return (error); 693 } 694 695 int 696 unp_connect2(struct socket *so, struct socket *so2) 697 { 698 struct unpcb *unp = sotounpcb(so); 699 struct unpcb *unp2; 700 701 rw_assert_wrlock(&unp_lock); 702 703 if (so2->so_type != so->so_type) 704 return (EPROTOTYPE); 705 unp2 = sotounpcb(so2); 706 unp->unp_conn = unp2; 707 switch (so->so_type) { 708 709 case SOCK_DGRAM: 710 SLIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_nextref); 711 soisconnected(so); 712 break; 713 714 case SOCK_STREAM: 715 case SOCK_SEQPACKET: 716 unp2->unp_conn = unp; 717 soisconnected(so); 718 soisconnected(so2); 719 break; 720 721 default: 722 panic("unp_connect2"); 723 } 724 return (0); 725 } 726 727 void 728 unp_disconnect(struct unpcb *unp) 729 { 730 struct unpcb *unp2 = unp->unp_conn; 731 732 if (unp2 == NULL) 733 return; 734 unp->unp_conn = NULL; 735 switch (unp->unp_socket->so_type) { 736 737 case SOCK_DGRAM: 738 SLIST_REMOVE(&unp2->unp_refs, unp, unpcb, unp_nextref); 739 unp->unp_socket->so_state &= ~SS_ISCONNECTED; 740 break; 741 742 case SOCK_STREAM: 743 case SOCK_SEQPACKET: 744 unp->unp_socket->so_snd.sb_mbcnt = 0; 745 unp->unp_socket->so_snd.sb_cc = 0; 746 soisdisconnected(unp->unp_socket); 747 unp2->unp_conn = NULL; 748 unp2->unp_socket->so_snd.sb_mbcnt = 0; 749 unp2->unp_socket->so_snd.sb_cc = 0; 750 soisdisconnected(unp2->unp_socket); 751 break; 752 } 753 } 754 755 void 756 unp_shutdown(struct unpcb *unp) 757 { 758 struct socket *so; 759 760 switch (unp->unp_socket->so_type) { 761 case SOCK_STREAM: 762 case SOCK_SEQPACKET: 763 if (unp->unp_conn && (so = unp->unp_conn->unp_socket)) 764 socantrcvmore(so); 765 break; 766 default: 767 break; 768 } 769 } 770 771 void 772 unp_drop(struct unpcb *unp, int errno) 773 { 774 struct socket *so = unp->unp_socket; 775 776 rw_assert_wrlock(&unp_lock); 777 778 so->so_error = errno; 779 unp_disconnect(unp); 780 if (so->so_head) { 781 so->so_pcb = NULL; 782 /* 783 * As long as `unp_lock' is taken before entering 784 * uipc_usrreq() releasing it here would lead to a 785 * double unlock. 786 */ 787 sofree(so, SL_NOUNLOCK); 788 m_freem(unp->unp_addr); 789 pool_put(&unpcb_pool, unp); 790 } 791 } 792 793 #ifdef notdef 794 unp_drain(void) 795 { 796 797 } 798 #endif 799 800 static struct unpcb * 801 fptounp(struct file *fp) 802 { 803 struct socket *so; 804 805 if (fp->f_type != DTYPE_SOCKET) 806 return (NULL); 807 if ((so = fp->f_data) == NULL) 808 return (NULL); 809 if (so->so_proto->pr_domain != &unixdomain) 810 return (NULL); 811 return (sotounpcb(so)); 812 } 813 814 int 815 unp_externalize(struct mbuf *rights, socklen_t controllen, int flags) 816 { 817 struct proc *p = curproc; /* XXX */ 818 struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 819 struct filedesc *fdp = p->p_fd; 820 int i, *fds = NULL; 821 struct fdpass *rp; 822 struct file *fp; 823 int nfds, error = 0; 824 825 /* 826 * This code only works because SCM_RIGHTS is the only supported 827 * control message type on unix sockets. Enforce this here. 828 */ 829 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET) 830 return EINVAL; 831 832 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / 833 sizeof(struct fdpass); 834 if (controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) 835 controllen = 0; 836 else 837 controllen -= CMSG_ALIGN(sizeof(struct cmsghdr)); 838 if (nfds > controllen / sizeof(int)) { 839 error = EMSGSIZE; 840 goto out; 841 } 842 843 /* Make sure the recipient should be able to see the descriptors.. */ 844 rp = (struct fdpass *)CMSG_DATA(cm); 845 846 /* fdp->fd_rdir requires KERNEL_LOCK() */ 847 KERNEL_LOCK(); 848 849 for (i = 0; i < nfds; i++) { 850 fp = rp->fp; 851 rp++; 852 error = pledge_recvfd(p, fp); 853 if (error) 854 break; 855 856 /* 857 * No to block devices. If passing a directory, 858 * make sure that it is underneath the root. 859 */ 860 if (fdp->fd_rdir != NULL && fp->f_type == DTYPE_VNODE) { 861 struct vnode *vp = (struct vnode *)fp->f_data; 862 863 if (vp->v_type == VBLK || 864 (vp->v_type == VDIR && 865 !vn_isunder(vp, fdp->fd_rdir, p))) { 866 error = EPERM; 867 break; 868 } 869 } 870 } 871 872 KERNEL_UNLOCK(); 873 874 if (error) 875 goto out; 876 877 fds = mallocarray(nfds, sizeof(int), M_TEMP, M_WAITOK); 878 879 fdplock(fdp); 880 restart: 881 /* 882 * First loop -- allocate file descriptor table slots for the 883 * new descriptors. 884 */ 885 rp = ((struct fdpass *)CMSG_DATA(cm)); 886 for (i = 0; i < nfds; i++) { 887 if ((error = fdalloc(p, 0, &fds[i])) != 0) { 888 /* 889 * Back out what we've done so far. 890 */ 891 for (--i; i >= 0; i--) 892 fdremove(fdp, fds[i]); 893 894 if (error == ENOSPC) { 895 fdexpand(p); 896 goto restart; 897 } 898 899 fdpunlock(fdp); 900 901 /* 902 * This is the error that has historically 903 * been returned, and some callers may 904 * expect it. 905 */ 906 907 error = EMSGSIZE; 908 goto out; 909 } 910 911 /* 912 * Make the slot reference the descriptor so that 913 * fdalloc() works properly.. We finalize it all 914 * in the loop below. 915 */ 916 mtx_enter(&fdp->fd_fplock); 917 KASSERT(fdp->fd_ofiles[fds[i]] == NULL); 918 fdp->fd_ofiles[fds[i]] = rp->fp; 919 mtx_leave(&fdp->fd_fplock); 920 921 fdp->fd_ofileflags[fds[i]] = (rp->flags & UF_PLEDGED); 922 if (flags & MSG_CMSG_CLOEXEC) 923 fdp->fd_ofileflags[fds[i]] |= UF_EXCLOSE; 924 925 rp++; 926 } 927 fdpunlock(fdp); 928 929 /* 930 * Now that adding them has succeeded, update all of the 931 * descriptor passing state. 932 */ 933 rp = (struct fdpass *)CMSG_DATA(cm); 934 935 rw_enter_write(&unp_lock); 936 for (i = 0; i < nfds; i++) { 937 struct unpcb *unp; 938 939 fp = rp->fp; 940 rp++; 941 if ((unp = fptounp(fp)) != NULL) 942 unp->unp_msgcount--; 943 unp_rights--; 944 } 945 rw_exit_write(&unp_lock); 946 947 /* 948 * Copy temporary array to message and adjust length, in case of 949 * transition from large struct file pointers to ints. 950 */ 951 memcpy(CMSG_DATA(cm), fds, nfds * sizeof(int)); 952 cm->cmsg_len = CMSG_LEN(nfds * sizeof(int)); 953 rights->m_len = CMSG_LEN(nfds * sizeof(int)); 954 out: 955 if (fds != NULL) 956 free(fds, M_TEMP, nfds * sizeof(int)); 957 958 if (error) { 959 if (nfds > 0) { 960 rp = ((struct fdpass *)CMSG_DATA(cm)); 961 rw_enter_write(&unp_lock); 962 unp_discard(rp, nfds); 963 rw_exit_write(&unp_lock); 964 } 965 } 966 967 return (error); 968 } 969 970 int 971 unp_internalize(struct mbuf *control, struct proc *p) 972 { 973 struct filedesc *fdp = p->p_fd; 974 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 975 struct fdpass *rp; 976 struct file *fp; 977 struct unpcb *unp; 978 int i, error; 979 int nfds, *ip, fd, neededspace; 980 981 /* 982 * Check for two potential msg_controllen values because 983 * IETF stuck their nose in a place it does not belong. 984 */ 985 if (control->m_len < CMSG_LEN(0) || cm->cmsg_len < CMSG_LEN(0)) 986 return (EINVAL); 987 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || 988 !(cm->cmsg_len == control->m_len || 989 control->m_len == CMSG_ALIGN(cm->cmsg_len))) 990 return (EINVAL); 991 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int); 992 993 rw_enter_write(&unp_lock); 994 if (unp_rights + nfds > maxfiles / 10) { 995 rw_exit_write(&unp_lock); 996 return (EMFILE); 997 } 998 unp_rights += nfds; 999 rw_exit_write(&unp_lock); 1000 1001 /* Make sure we have room for the struct file pointers */ 1002 morespace: 1003 neededspace = CMSG_SPACE(nfds * sizeof(struct fdpass)) - 1004 control->m_len; 1005 if (neededspace > m_trailingspace(control)) { 1006 char *tmp; 1007 /* if we already have a cluster, the message is just too big */ 1008 if (control->m_flags & M_EXT) { 1009 error = E2BIG; 1010 goto nospace; 1011 } 1012 1013 /* copy cmsg data temporarily out of the mbuf */ 1014 tmp = malloc(control->m_len, M_TEMP, M_WAITOK); 1015 memcpy(tmp, mtod(control, caddr_t), control->m_len); 1016 1017 /* allocate a cluster and try again */ 1018 MCLGET(control, M_WAIT); 1019 if ((control->m_flags & M_EXT) == 0) { 1020 free(tmp, M_TEMP, control->m_len); 1021 error = ENOBUFS; /* allocation failed */ 1022 goto nospace; 1023 } 1024 1025 /* copy the data back into the cluster */ 1026 cm = mtod(control, struct cmsghdr *); 1027 memcpy(cm, tmp, control->m_len); 1028 free(tmp, M_TEMP, control->m_len); 1029 goto morespace; 1030 } 1031 1032 /* adjust message & mbuf to note amount of space actually used. */ 1033 cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct fdpass)); 1034 control->m_len = CMSG_SPACE(nfds * sizeof(struct fdpass)); 1035 1036 ip = ((int *)CMSG_DATA(cm)) + nfds - 1; 1037 rp = ((struct fdpass *)CMSG_DATA(cm)) + nfds - 1; 1038 fdplock(fdp); 1039 rw_enter_write(&unp_lock); 1040 for (i = 0; i < nfds; i++) { 1041 memcpy(&fd, ip, sizeof fd); 1042 ip--; 1043 if ((fp = fd_getfile(fdp, fd)) == NULL) { 1044 error = EBADF; 1045 goto fail; 1046 } 1047 if (fp->f_count >= FDUP_MAX_COUNT) { 1048 error = EDEADLK; 1049 goto fail; 1050 } 1051 error = pledge_sendfd(p, fp); 1052 if (error) 1053 goto fail; 1054 1055 /* kqueue descriptors cannot be copied */ 1056 if (fp->f_type == DTYPE_KQUEUE) { 1057 error = EINVAL; 1058 goto fail; 1059 } 1060 rp->fp = fp; 1061 rp->flags = fdp->fd_ofileflags[fd] & UF_PLEDGED; 1062 rp--; 1063 if ((unp = fptounp(fp)) != NULL) { 1064 unp->unp_file = fp; 1065 unp->unp_msgcount++; 1066 } 1067 } 1068 rw_exit_write(&unp_lock); 1069 fdpunlock(fdp); 1070 return (0); 1071 fail: 1072 rw_exit_write(&unp_lock); 1073 fdpunlock(fdp); 1074 if (fp != NULL) 1075 FRELE(fp, p); 1076 /* Back out what we just did. */ 1077 for ( ; i > 0; i--) { 1078 rp++; 1079 fp = rp->fp; 1080 rw_enter_write(&unp_lock); 1081 if ((unp = fptounp(fp)) != NULL) 1082 unp->unp_msgcount--; 1083 rw_exit_write(&unp_lock); 1084 FRELE(fp, p); 1085 } 1086 1087 nospace: 1088 rw_enter_write(&unp_lock); 1089 unp_rights -= nfds; 1090 rw_exit_write(&unp_lock); 1091 1092 return (error); 1093 } 1094 1095 void 1096 unp_gc(void *arg __unused) 1097 { 1098 struct unp_deferral *defer; 1099 struct file *fp; 1100 struct socket *so; 1101 struct unpcb *unp; 1102 int nunref, i; 1103 1104 rw_enter_write(&unp_lock); 1105 1106 if (unp_gcing) 1107 goto unlock; 1108 unp_gcing = 1; 1109 1110 /* close any fds on the deferred list */ 1111 while ((defer = SLIST_FIRST(&unp_deferred)) != NULL) { 1112 SLIST_REMOVE_HEAD(&unp_deferred, ud_link); 1113 for (i = 0; i < defer->ud_n; i++) { 1114 fp = defer->ud_fp[i].fp; 1115 if (fp == NULL) 1116 continue; 1117 /* closef() expects a refcount of 2 */ 1118 FREF(fp); 1119 if ((unp = fptounp(fp)) != NULL) 1120 unp->unp_msgcount--; 1121 unp_rights--; 1122 rw_exit_write(&unp_lock); 1123 (void) closef(fp, NULL); 1124 rw_enter_write(&unp_lock); 1125 } 1126 free(defer, M_TEMP, sizeof(*defer) + 1127 sizeof(struct fdpass) * defer->ud_n); 1128 } 1129 1130 unp_defer = 0; 1131 LIST_FOREACH(unp, &unp_head, unp_link) 1132 unp->unp_flags &= ~(UNP_GCMARK | UNP_GCDEFER | UNP_GCDEAD); 1133 do { 1134 nunref = 0; 1135 LIST_FOREACH(unp, &unp_head, unp_link) { 1136 fp = unp->unp_file; 1137 if (unp->unp_flags & UNP_GCDEFER) { 1138 /* 1139 * This socket is referenced by another 1140 * socket which is known to be live, 1141 * so it's certainly live. 1142 */ 1143 unp->unp_flags &= ~UNP_GCDEFER; 1144 unp_defer--; 1145 } else if (unp->unp_flags & UNP_GCMARK) { 1146 /* marked as live in previous pass */ 1147 continue; 1148 } else if (fp == NULL) { 1149 /* not being passed, so can't be in loop */ 1150 } else if (fp->f_count == 0) { 1151 /* 1152 * Already being closed, let normal close 1153 * path take its course 1154 */ 1155 } else { 1156 /* 1157 * Unreferenced by other sockets so far, 1158 * so if all the references (f_count) are 1159 * from passing (unp_msgcount) then this 1160 * socket is prospectively dead 1161 */ 1162 if (fp->f_count == unp->unp_msgcount) { 1163 nunref++; 1164 unp->unp_flags |= UNP_GCDEAD; 1165 continue; 1166 } 1167 } 1168 1169 /* 1170 * This is the first time we've seen this socket on 1171 * the mark pass and known it has a live reference, 1172 * so mark it, then scan its receive buffer for 1173 * sockets and note them as deferred (== referenced, 1174 * but not yet marked). 1175 */ 1176 unp->unp_flags |= UNP_GCMARK; 1177 1178 so = unp->unp_socket; 1179 unp_scan(so->so_rcv.sb_mb, unp_mark); 1180 } 1181 } while (unp_defer); 1182 1183 /* 1184 * If there are any unreferenced sockets, then for each dispose 1185 * of files in its receive buffer and then close it. 1186 */ 1187 if (nunref) { 1188 LIST_FOREACH(unp, &unp_head, unp_link) { 1189 if (unp->unp_flags & UNP_GCDEAD) 1190 unp_scan(unp->unp_socket->so_rcv.sb_mb, 1191 unp_discard); 1192 } 1193 } 1194 unp_gcing = 0; 1195 unlock: 1196 rw_exit_write(&unp_lock); 1197 } 1198 1199 void 1200 unp_dispose(struct mbuf *m) 1201 { 1202 1203 if (m) 1204 unp_scan(m, unp_discard); 1205 } 1206 1207 void 1208 unp_scan(struct mbuf *m0, void (*op)(struct fdpass *, int)) 1209 { 1210 struct mbuf *m; 1211 struct fdpass *rp; 1212 struct cmsghdr *cm; 1213 int qfds; 1214 1215 while (m0) { 1216 for (m = m0; m; m = m->m_next) { 1217 if (m->m_type == MT_CONTROL && 1218 m->m_len >= sizeof(*cm)) { 1219 cm = mtod(m, struct cmsghdr *); 1220 if (cm->cmsg_level != SOL_SOCKET || 1221 cm->cmsg_type != SCM_RIGHTS) 1222 continue; 1223 qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm)) 1224 / sizeof(struct fdpass); 1225 if (qfds > 0) { 1226 rp = (struct fdpass *)CMSG_DATA(cm); 1227 op(rp, qfds); 1228 } 1229 break; /* XXX, but saves time */ 1230 } 1231 } 1232 m0 = m0->m_nextpkt; 1233 } 1234 } 1235 1236 void 1237 unp_mark(struct fdpass *rp, int nfds) 1238 { 1239 struct unpcb *unp; 1240 int i; 1241 1242 rw_assert_wrlock(&unp_lock); 1243 1244 for (i = 0; i < nfds; i++) { 1245 if (rp[i].fp == NULL) 1246 continue; 1247 1248 unp = fptounp(rp[i].fp); 1249 if (unp == NULL) 1250 continue; 1251 1252 if (unp->unp_flags & (UNP_GCMARK|UNP_GCDEFER)) 1253 continue; 1254 1255 unp_defer++; 1256 unp->unp_flags |= UNP_GCDEFER; 1257 unp->unp_flags &= ~UNP_GCDEAD; 1258 } 1259 } 1260 1261 void 1262 unp_discard(struct fdpass *rp, int nfds) 1263 { 1264 struct unp_deferral *defer; 1265 1266 rw_assert_wrlock(&unp_lock); 1267 1268 /* copy the file pointers to a deferral structure */ 1269 defer = malloc(sizeof(*defer) + sizeof(*rp) * nfds, M_TEMP, M_WAITOK); 1270 defer->ud_n = nfds; 1271 memcpy(&defer->ud_fp[0], rp, sizeof(*rp) * nfds); 1272 memset(rp, 0, sizeof(*rp) * nfds); 1273 SLIST_INSERT_HEAD(&unp_deferred, defer, ud_link); 1274 1275 task_add(systqmp, &unp_gc_task); 1276 } 1277 1278 int 1279 unp_nam2sun(struct mbuf *nam, struct sockaddr_un **sun, size_t *pathlen) 1280 { 1281 struct sockaddr *sa = mtod(nam, struct sockaddr *); 1282 size_t size, len; 1283 1284 if (nam->m_len < offsetof(struct sockaddr, sa_data)) 1285 return EINVAL; 1286 if (sa->sa_family != AF_UNIX) 1287 return EAFNOSUPPORT; 1288 if (sa->sa_len != nam->m_len) 1289 return EINVAL; 1290 if (sa->sa_len > sizeof(struct sockaddr_un)) 1291 return EINVAL; 1292 *sun = (struct sockaddr_un *)sa; 1293 1294 /* ensure that sun_path is NUL terminated and fits */ 1295 size = (*sun)->sun_len - offsetof(struct sockaddr_un, sun_path); 1296 len = strnlen((*sun)->sun_path, size); 1297 if (len == sizeof((*sun)->sun_path)) 1298 return EINVAL; 1299 if (len == size) { 1300 if (m_trailingspace(nam) == 0) 1301 return EINVAL; 1302 nam->m_len++; 1303 (*sun)->sun_len++; 1304 (*sun)->sun_path[len] = '\0'; 1305 } 1306 if (pathlen != NULL) 1307 *pathlen = len; 1308 1309 return 0; 1310 } 1311