1 /* $OpenBSD: uipc_usrreq.c,v 1.148 2021/05/25 22:45:09 bluhm Exp $ */ 2 /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/filedesc.h> 39 #include <sys/domain.h> 40 #include <sys/protosw.h> 41 #include <sys/queue.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/unpcb.h> 45 #include <sys/un.h> 46 #include <sys/namei.h> 47 #include <sys/vnode.h> 48 #include <sys/file.h> 49 #include <sys/stat.h> 50 #include <sys/mbuf.h> 51 #include <sys/task.h> 52 #include <sys/pledge.h> 53 #include <sys/pool.h> 54 #include <sys/rwlock.h> 55 #include <sys/sysctl.h> 56 57 /* 58 * Locks used to protect global data and struct members: 59 * I immutable after creation 60 * U unp_lock 61 */ 62 struct rwlock unp_lock = RWLOCK_INITIALIZER("unplock"); 63 64 /* 65 * Stack of sets of files that were passed over a socket but were 66 * not received and need to be closed. 67 */ 68 struct unp_deferral { 69 SLIST_ENTRY(unp_deferral) ud_link; /* [U] */ 70 int ud_n; /* [I] */ 71 /* followed by ud_n struct fdpass */ 72 struct fdpass ud_fp[]; /* [I] */ 73 }; 74 75 void uipc_setaddr(const struct unpcb *, struct mbuf *); 76 void unp_discard(struct fdpass *, int); 77 void unp_mark(struct fdpass *, int); 78 void unp_scan(struct mbuf *, void (*)(struct fdpass *, int)); 79 int unp_nam2sun(struct mbuf *, struct sockaddr_un **, size_t *); 80 81 struct pool unpcb_pool; 82 struct task unp_gc_task = TASK_INITIALIZER(unp_gc, NULL); 83 84 /* 85 * Unix communications domain. 86 * 87 * TODO: 88 * RDM 89 * rethink name space problems 90 * need a proper out-of-band 91 */ 92 const struct sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX }; 93 94 /* [U] list of all UNIX domain sockets, for unp_gc() */ 95 LIST_HEAD(unp_head, unpcb) unp_head = 96 LIST_HEAD_INITIALIZER(unp_head); 97 /* [U] list of sets of files that were sent over sockets that are now closed */ 98 SLIST_HEAD(,unp_deferral) unp_deferred = 99 SLIST_HEAD_INITIALIZER(unp_deferred); 100 101 ino_t unp_ino; /* [U] prototype for fake inode numbers */ 102 int unp_rights; /* [U] file descriptors in flight */ 103 int unp_defer; /* [U] number of deferred fp to close by the GC task */ 104 int unp_gcing; /* [U] GC task currently running */ 105 106 void 107 unp_init(void) 108 { 109 pool_init(&unpcb_pool, sizeof(struct unpcb), 0, 110 IPL_SOFTNET, 0, "unpcb", NULL); 111 } 112 113 void 114 uipc_setaddr(const struct unpcb *unp, struct mbuf *nam) 115 { 116 if (unp != NULL && unp->unp_addr != NULL) { 117 nam->m_len = unp->unp_addr->m_len; 118 memcpy(mtod(nam, caddr_t), mtod(unp->unp_addr, caddr_t), 119 nam->m_len); 120 } else { 121 nam->m_len = sizeof(sun_noname); 122 memcpy(mtod(nam, struct sockaddr *), &sun_noname, 123 nam->m_len); 124 } 125 } 126 127 int 128 uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 129 struct mbuf *control, struct proc *p) 130 { 131 struct unpcb *unp = sotounpcb(so); 132 struct unpcb *unp2; 133 struct socket *so2; 134 int error = 0; 135 136 if (req == PRU_CONTROL) 137 return (EOPNOTSUPP); 138 if (req != PRU_SEND && control && control->m_len) { 139 error = EOPNOTSUPP; 140 goto release; 141 } 142 if (unp == NULL) { 143 error = EINVAL; 144 goto release; 145 } 146 147 switch (req) { 148 149 case PRU_BIND: 150 error = unp_bind(unp, nam, p); 151 break; 152 153 case PRU_LISTEN: 154 if (unp->unp_vnode == NULL) 155 error = EINVAL; 156 break; 157 158 case PRU_CONNECT: 159 error = unp_connect(so, nam, p); 160 break; 161 162 case PRU_CONNECT2: 163 error = unp_connect2(so, (struct socket *)nam); 164 if (!error) { 165 unp->unp_connid.uid = p->p_ucred->cr_uid; 166 unp->unp_connid.gid = p->p_ucred->cr_gid; 167 unp->unp_connid.pid = p->p_p->ps_pid; 168 unp->unp_flags |= UNP_FEIDS; 169 unp2 = sotounpcb((struct socket *)nam); 170 unp2->unp_connid.uid = p->p_ucred->cr_uid; 171 unp2->unp_connid.gid = p->p_ucred->cr_gid; 172 unp2->unp_connid.pid = p->p_p->ps_pid; 173 unp2->unp_flags |= UNP_FEIDS; 174 } 175 break; 176 177 case PRU_DISCONNECT: 178 unp_disconnect(unp); 179 break; 180 181 case PRU_ACCEPT: 182 /* 183 * Pass back name of connected socket, 184 * if it was bound and we are still connected 185 * (our peer may have closed already!). 186 */ 187 uipc_setaddr(unp->unp_conn, nam); 188 break; 189 190 case PRU_SHUTDOWN: 191 socantsendmore(so); 192 unp_shutdown(unp); 193 break; 194 195 case PRU_RCVD: 196 switch (so->so_type) { 197 198 case SOCK_DGRAM: 199 panic("uipc 1"); 200 /*NOTREACHED*/ 201 202 case SOCK_STREAM: 203 case SOCK_SEQPACKET: 204 if (unp->unp_conn == NULL) 205 break; 206 so2 = unp->unp_conn->unp_socket; 207 /* 208 * Adjust backpressure on sender 209 * and wakeup any waiting to write. 210 */ 211 so2->so_snd.sb_mbcnt = so->so_rcv.sb_mbcnt; 212 so2->so_snd.sb_cc = so->so_rcv.sb_cc; 213 sowwakeup(so2); 214 break; 215 216 default: 217 panic("uipc 2"); 218 } 219 break; 220 221 case PRU_SEND: 222 if (control && (error = unp_internalize(control, p))) 223 break; 224 switch (so->so_type) { 225 226 case SOCK_DGRAM: { 227 const struct sockaddr *from; 228 229 if (nam) { 230 if (unp->unp_conn) { 231 error = EISCONN; 232 break; 233 } 234 error = unp_connect(so, nam, p); 235 if (error) 236 break; 237 } else { 238 if (unp->unp_conn == NULL) { 239 error = ENOTCONN; 240 break; 241 } 242 } 243 so2 = unp->unp_conn->unp_socket; 244 if (unp->unp_addr) 245 from = mtod(unp->unp_addr, struct sockaddr *); 246 else 247 from = &sun_noname; 248 if (sbappendaddr(so2, &so2->so_rcv, from, m, control)) { 249 sorwakeup(so2); 250 m = NULL; 251 control = NULL; 252 } else 253 error = ENOBUFS; 254 if (nam) 255 unp_disconnect(unp); 256 break; 257 } 258 259 case SOCK_STREAM: 260 case SOCK_SEQPACKET: 261 if (so->so_state & SS_CANTSENDMORE) { 262 error = EPIPE; 263 break; 264 } 265 if (unp->unp_conn == NULL) { 266 error = ENOTCONN; 267 break; 268 } 269 so2 = unp->unp_conn->unp_socket; 270 /* 271 * Send to paired receive port, and then raise 272 * send buffer counts to maintain backpressure. 273 * Wake up readers. 274 */ 275 if (control) { 276 if (sbappendcontrol(so2, &so2->so_rcv, m, 277 control)) { 278 control = NULL; 279 } else { 280 error = ENOBUFS; 281 break; 282 } 283 } else if (so->so_type == SOCK_SEQPACKET) 284 sbappendrecord(so2, &so2->so_rcv, m); 285 else 286 sbappend(so2, &so2->so_rcv, m); 287 so->so_snd.sb_mbcnt = so2->so_rcv.sb_mbcnt; 288 so->so_snd.sb_cc = so2->so_rcv.sb_cc; 289 if (so2->so_rcv.sb_cc > 0) 290 sorwakeup(so2); 291 m = NULL; 292 break; 293 294 default: 295 panic("uipc 4"); 296 } 297 /* we need to undo unp_internalize in case of errors */ 298 if (control && error) 299 unp_dispose(control); 300 break; 301 302 case PRU_ABORT: 303 unp_drop(unp, ECONNABORTED); 304 break; 305 306 case PRU_SENSE: { 307 struct stat *sb = (struct stat *)m; 308 309 sb->st_blksize = so->so_snd.sb_hiwat; 310 sb->st_dev = NODEV; 311 if (unp->unp_ino == 0) 312 unp->unp_ino = unp_ino++; 313 sb->st_atim.tv_sec = 314 sb->st_mtim.tv_sec = 315 sb->st_ctim.tv_sec = unp->unp_ctime.tv_sec; 316 sb->st_atim.tv_nsec = 317 sb->st_mtim.tv_nsec = 318 sb->st_ctim.tv_nsec = unp->unp_ctime.tv_nsec; 319 sb->st_ino = unp->unp_ino; 320 break; 321 } 322 323 case PRU_RCVOOB: 324 case PRU_SENDOOB: 325 error = EOPNOTSUPP; 326 break; 327 328 case PRU_SOCKADDR: 329 uipc_setaddr(unp, nam); 330 break; 331 332 case PRU_PEERADDR: 333 uipc_setaddr(unp->unp_conn, nam); 334 break; 335 336 case PRU_SLOWTIMO: 337 break; 338 339 default: 340 panic("uipc_usrreq"); 341 } 342 release: 343 if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) { 344 m_freem(control); 345 m_freem(m); 346 } 347 return (error); 348 } 349 350 /* 351 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 352 * for stream sockets, although the total for sender and receiver is 353 * actually only PIPSIZ. 354 * Datagram sockets really use the sendspace as the maximum datagram size, 355 * and don't really want to reserve the sendspace. Their recvspace should 356 * be large enough for at least one max-size datagram plus address. 357 */ 358 #define PIPSIZ 8192 359 u_int unpst_sendspace = PIPSIZ; 360 u_int unpst_recvspace = PIPSIZ; 361 u_int unpsq_sendspace = PIPSIZ; 362 u_int unpsq_recvspace = PIPSIZ; 363 u_int unpdg_sendspace = 2*1024; /* really max datagram size */ 364 u_int unpdg_recvspace = 16*1024; 365 366 const struct sysctl_bounded_args unpstctl_vars[] = { 367 { UNPCTL_RECVSPACE, &unpst_recvspace, 0, SB_MAX }, 368 { UNPCTL_SENDSPACE, &unpst_sendspace, 0, SB_MAX }, 369 }; 370 const struct sysctl_bounded_args unpsqctl_vars[] = { 371 { UNPCTL_RECVSPACE, &unpsq_recvspace, 0, SB_MAX }, 372 { UNPCTL_SENDSPACE, &unpsq_sendspace, 0, SB_MAX }, 373 }; 374 const struct sysctl_bounded_args unpdgctl_vars[] = { 375 { UNPCTL_RECVSPACE, &unpdg_recvspace, 0, SB_MAX }, 376 { UNPCTL_SENDSPACE, &unpdg_sendspace, 0, SB_MAX }, 377 }; 378 379 int 380 uipc_attach(struct socket *so, int proto) 381 { 382 struct unpcb *unp; 383 int error; 384 385 rw_assert_wrlock(&unp_lock); 386 387 if (so->so_pcb) 388 return EISCONN; 389 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 390 switch (so->so_type) { 391 392 case SOCK_STREAM: 393 error = soreserve(so, unpst_sendspace, unpst_recvspace); 394 break; 395 396 case SOCK_SEQPACKET: 397 error = soreserve(so, unpsq_sendspace, unpsq_recvspace); 398 break; 399 400 case SOCK_DGRAM: 401 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 402 break; 403 404 default: 405 panic("unp_attach"); 406 } 407 if (error) 408 return (error); 409 } 410 unp = pool_get(&unpcb_pool, PR_NOWAIT|PR_ZERO); 411 if (unp == NULL) 412 return (ENOBUFS); 413 unp->unp_socket = so; 414 so->so_pcb = unp; 415 getnanotime(&unp->unp_ctime); 416 LIST_INSERT_HEAD(&unp_head, unp, unp_link); 417 return (0); 418 } 419 420 int 421 uipc_detach(struct socket *so) 422 { 423 struct unpcb *unp = sotounpcb(so); 424 425 if (unp == NULL) 426 return (EINVAL); 427 428 unp_detach(unp); 429 430 return (0); 431 } 432 433 int 434 uipc_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 435 size_t newlen) 436 { 437 int *valp = &unp_defer; 438 439 /* All sysctl names at this level are terminal. */ 440 switch (name[0]) { 441 case SOCK_STREAM: 442 if (namelen != 2) 443 return (ENOTDIR); 444 return sysctl_bounded_arr(unpstctl_vars, nitems(unpstctl_vars), 445 name + 1, namelen - 1, oldp, oldlenp, newp, newlen); 446 case SOCK_SEQPACKET: 447 if (namelen != 2) 448 return (ENOTDIR); 449 return sysctl_bounded_arr(unpsqctl_vars, nitems(unpsqctl_vars), 450 name + 1, namelen - 1, oldp, oldlenp, newp, newlen); 451 case SOCK_DGRAM: 452 if (namelen != 2) 453 return (ENOTDIR); 454 return sysctl_bounded_arr(unpdgctl_vars, nitems(unpdgctl_vars), 455 name + 1, namelen - 1, oldp, oldlenp, newp, newlen); 456 case NET_UNIX_INFLIGHT: 457 valp = &unp_rights; 458 /* FALLTHOUGH */ 459 case NET_UNIX_DEFERRED: 460 if (namelen != 1) 461 return (ENOTDIR); 462 return sysctl_rdint(oldp, oldlenp, newp, *valp); 463 default: 464 return (ENOPROTOOPT); 465 } 466 } 467 468 void 469 unp_detach(struct unpcb *unp) 470 { 471 struct socket *so = unp->unp_socket; 472 struct vnode *vp = NULL; 473 474 rw_assert_wrlock(&unp_lock); 475 476 LIST_REMOVE(unp, unp_link); 477 if (unp->unp_vnode) { 478 /* 479 * `v_socket' is only read in unp_connect and 480 * unplock prevents concurrent access. 481 */ 482 483 unp->unp_vnode->v_socket = NULL; 484 vp = unp->unp_vnode; 485 unp->unp_vnode = NULL; 486 } 487 488 if (unp->unp_conn) 489 unp_disconnect(unp); 490 while (!SLIST_EMPTY(&unp->unp_refs)) 491 unp_drop(SLIST_FIRST(&unp->unp_refs), ECONNRESET); 492 soisdisconnected(so); 493 so->so_pcb = NULL; 494 m_freem(unp->unp_addr); 495 pool_put(&unpcb_pool, unp); 496 if (unp_rights) 497 task_add(systqmp, &unp_gc_task); 498 499 if (vp != NULL) { 500 /* 501 * Enforce `i_lock' -> `unplock' because fifo subsystem 502 * requires it. The socket can't be closed concurrently 503 * because the file descriptor reference is 504 * still hold. 505 */ 506 507 sounlock(so, SL_LOCKED); 508 KERNEL_LOCK(); 509 vrele(vp); 510 KERNEL_UNLOCK(); 511 solock(so); 512 } 513 } 514 515 int 516 unp_bind(struct unpcb *unp, struct mbuf *nam, struct proc *p) 517 { 518 struct sockaddr_un *soun; 519 struct mbuf *nam2; 520 struct vnode *vp; 521 struct vattr vattr; 522 int error; 523 struct nameidata nd; 524 size_t pathlen; 525 526 if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING)) 527 return (EINVAL); 528 if (unp->unp_vnode != NULL) 529 return (EINVAL); 530 if ((error = unp_nam2sun(nam, &soun, &pathlen))) 531 return (error); 532 533 unp->unp_flags |= UNP_BINDING; 534 535 /* 536 * Enforce `i_lock' -> `unplock' because fifo subsystem 537 * requires it. The socket can't be closed concurrently 538 * because the file descriptor reference is still held. 539 */ 540 541 sounlock(unp->unp_socket, SL_LOCKED); 542 543 nam2 = m_getclr(M_WAITOK, MT_SONAME); 544 nam2->m_len = sizeof(struct sockaddr_un); 545 memcpy(mtod(nam2, struct sockaddr_un *), soun, 546 offsetof(struct sockaddr_un, sun_path) + pathlen); 547 /* No need to NUL terminate: m_getclr() returns zero'd mbufs. */ 548 549 soun = mtod(nam2, struct sockaddr_un *); 550 551 /* Fixup sun_len to keep it in sync with m_len. */ 552 soun->sun_len = nam2->m_len; 553 554 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE, 555 soun->sun_path, p); 556 nd.ni_pledge = PLEDGE_UNIX; 557 558 KERNEL_LOCK(); 559 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 560 error = namei(&nd); 561 if (error != 0) { 562 m_freem(nam2); 563 solock(unp->unp_socket); 564 goto out; 565 } 566 vp = nd.ni_vp; 567 if (vp != NULL) { 568 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 569 if (nd.ni_dvp == vp) 570 vrele(nd.ni_dvp); 571 else 572 vput(nd.ni_dvp); 573 vrele(vp); 574 m_freem(nam2); 575 error = EADDRINUSE; 576 solock(unp->unp_socket); 577 goto out; 578 } 579 VATTR_NULL(&vattr); 580 vattr.va_type = VSOCK; 581 vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask; 582 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 583 vput(nd.ni_dvp); 584 if (error) { 585 m_freem(nam2); 586 solock(unp->unp_socket); 587 goto out; 588 } 589 solock(unp->unp_socket); 590 unp->unp_addr = nam2; 591 vp = nd.ni_vp; 592 vp->v_socket = unp->unp_socket; 593 unp->unp_vnode = vp; 594 unp->unp_connid.uid = p->p_ucred->cr_uid; 595 unp->unp_connid.gid = p->p_ucred->cr_gid; 596 unp->unp_connid.pid = p->p_p->ps_pid; 597 unp->unp_flags |= UNP_FEIDSBIND; 598 VOP_UNLOCK(vp); 599 out: 600 KERNEL_UNLOCK(); 601 unp->unp_flags &= ~UNP_BINDING; 602 603 return (error); 604 } 605 606 int 607 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p) 608 { 609 struct sockaddr_un *soun; 610 struct vnode *vp; 611 struct socket *so2, *so3; 612 struct unpcb *unp, *unp2, *unp3; 613 struct nameidata nd; 614 int error; 615 616 unp = sotounpcb(so); 617 if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING)) 618 return (EISCONN); 619 if ((error = unp_nam2sun(nam, &soun, NULL))) 620 return (error); 621 622 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p); 623 nd.ni_pledge = PLEDGE_UNIX; 624 625 unp->unp_flags |= UNP_CONNECTING; 626 627 /* 628 * Enforce `i_lock' -> `unplock' because fifo subsystem 629 * requires it. The socket can't be closed concurrently 630 * because the file descriptor reference is still held. 631 */ 632 633 sounlock(so, SL_LOCKED); 634 635 KERNEL_LOCK(); 636 error = namei(&nd); 637 if (error != 0) 638 goto unlock; 639 vp = nd.ni_vp; 640 if (vp->v_type != VSOCK) { 641 error = ENOTSOCK; 642 goto put; 643 } 644 if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0) 645 goto put; 646 solock(so); 647 so2 = vp->v_socket; 648 if (so2 == NULL) { 649 error = ECONNREFUSED; 650 goto put_locked; 651 } 652 if (so->so_type != so2->so_type) { 653 error = EPROTOTYPE; 654 goto put_locked; 655 } 656 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 657 if ((so2->so_options & SO_ACCEPTCONN) == 0 || 658 (so3 = sonewconn(so2, 0)) == 0) { 659 error = ECONNREFUSED; 660 goto put_locked; 661 } 662 unp2 = sotounpcb(so2); 663 unp3 = sotounpcb(so3); 664 if (unp2->unp_addr) 665 unp3->unp_addr = 666 m_copym(unp2->unp_addr, 0, M_COPYALL, M_NOWAIT); 667 unp3->unp_connid.uid = p->p_ucred->cr_uid; 668 unp3->unp_connid.gid = p->p_ucred->cr_gid; 669 unp3->unp_connid.pid = p->p_p->ps_pid; 670 unp3->unp_flags |= UNP_FEIDS; 671 so2 = so3; 672 if (unp2->unp_flags & UNP_FEIDSBIND) { 673 unp->unp_connid = unp2->unp_connid; 674 unp->unp_flags |= UNP_FEIDS; 675 } 676 } 677 error = unp_connect2(so, so2); 678 put_locked: 679 sounlock(so, SL_LOCKED); 680 put: 681 vput(vp); 682 unlock: 683 KERNEL_UNLOCK(); 684 solock(so); 685 unp->unp_flags &= ~UNP_CONNECTING; 686 687 return (error); 688 } 689 690 int 691 unp_connect2(struct socket *so, struct socket *so2) 692 { 693 struct unpcb *unp = sotounpcb(so); 694 struct unpcb *unp2; 695 696 rw_assert_wrlock(&unp_lock); 697 698 if (so2->so_type != so->so_type) 699 return (EPROTOTYPE); 700 unp2 = sotounpcb(so2); 701 unp->unp_conn = unp2; 702 switch (so->so_type) { 703 704 case SOCK_DGRAM: 705 SLIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_nextref); 706 soisconnected(so); 707 break; 708 709 case SOCK_STREAM: 710 case SOCK_SEQPACKET: 711 unp2->unp_conn = unp; 712 soisconnected(so); 713 soisconnected(so2); 714 break; 715 716 default: 717 panic("unp_connect2"); 718 } 719 return (0); 720 } 721 722 void 723 unp_disconnect(struct unpcb *unp) 724 { 725 struct unpcb *unp2 = unp->unp_conn; 726 727 if (unp2 == NULL) 728 return; 729 unp->unp_conn = NULL; 730 switch (unp->unp_socket->so_type) { 731 732 case SOCK_DGRAM: 733 SLIST_REMOVE(&unp2->unp_refs, unp, unpcb, unp_nextref); 734 unp->unp_socket->so_state &= ~SS_ISCONNECTED; 735 break; 736 737 case SOCK_STREAM: 738 case SOCK_SEQPACKET: 739 unp->unp_socket->so_snd.sb_mbcnt = 0; 740 unp->unp_socket->so_snd.sb_cc = 0; 741 soisdisconnected(unp->unp_socket); 742 unp2->unp_conn = NULL; 743 unp2->unp_socket->so_snd.sb_mbcnt = 0; 744 unp2->unp_socket->so_snd.sb_cc = 0; 745 soisdisconnected(unp2->unp_socket); 746 break; 747 } 748 } 749 750 void 751 unp_shutdown(struct unpcb *unp) 752 { 753 struct socket *so; 754 755 switch (unp->unp_socket->so_type) { 756 case SOCK_STREAM: 757 case SOCK_SEQPACKET: 758 if (unp->unp_conn && (so = unp->unp_conn->unp_socket)) 759 socantrcvmore(so); 760 break; 761 default: 762 break; 763 } 764 } 765 766 void 767 unp_drop(struct unpcb *unp, int errno) 768 { 769 struct socket *so = unp->unp_socket; 770 771 rw_assert_wrlock(&unp_lock); 772 773 so->so_error = errno; 774 unp_disconnect(unp); 775 if (so->so_head) { 776 so->so_pcb = NULL; 777 /* 778 * As long as `unp_lock' is taken before entering 779 * uipc_usrreq() releasing it here would lead to a 780 * double unlock. 781 */ 782 sofree(so, SL_NOUNLOCK); 783 m_freem(unp->unp_addr); 784 pool_put(&unpcb_pool, unp); 785 } 786 } 787 788 #ifdef notdef 789 unp_drain(void) 790 { 791 792 } 793 #endif 794 795 static struct unpcb * 796 fptounp(struct file *fp) 797 { 798 struct socket *so; 799 800 if (fp->f_type != DTYPE_SOCKET) 801 return (NULL); 802 if ((so = fp->f_data) == NULL) 803 return (NULL); 804 if (so->so_proto->pr_domain != &unixdomain) 805 return (NULL); 806 return (sotounpcb(so)); 807 } 808 809 int 810 unp_externalize(struct mbuf *rights, socklen_t controllen, int flags) 811 { 812 struct proc *p = curproc; /* XXX */ 813 struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 814 struct filedesc *fdp = p->p_fd; 815 int i, *fds = NULL; 816 struct fdpass *rp; 817 struct file *fp; 818 int nfds, error = 0; 819 820 rw_assert_wrlock(&unp_lock); 821 822 /* 823 * This code only works because SCM_RIGHTS is the only supported 824 * control message type on unix sockets. Enforce this here. 825 */ 826 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET) 827 return EINVAL; 828 829 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / 830 sizeof(struct fdpass); 831 if (controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) 832 controllen = 0; 833 else 834 controllen -= CMSG_ALIGN(sizeof(struct cmsghdr)); 835 if (nfds > controllen / sizeof(int)) { 836 error = EMSGSIZE; 837 goto restart; 838 } 839 840 /* Make sure the recipient should be able to see the descriptors.. */ 841 rp = (struct fdpass *)CMSG_DATA(cm); 842 843 /* fdp->fd_rdir requires KERNEL_LOCK() */ 844 KERNEL_LOCK(); 845 846 for (i = 0; i < nfds; i++) { 847 fp = rp->fp; 848 rp++; 849 error = pledge_recvfd(p, fp); 850 if (error) 851 break; 852 853 /* 854 * No to block devices. If passing a directory, 855 * make sure that it is underneath the root. 856 */ 857 if (fdp->fd_rdir != NULL && fp->f_type == DTYPE_VNODE) { 858 struct vnode *vp = (struct vnode *)fp->f_data; 859 860 if (vp->v_type == VBLK || 861 (vp->v_type == VDIR && 862 !vn_isunder(vp, fdp->fd_rdir, p))) { 863 error = EPERM; 864 break; 865 } 866 } 867 } 868 869 KERNEL_UNLOCK(); 870 871 fds = mallocarray(nfds, sizeof(int), M_TEMP, M_WAITOK); 872 873 restart: 874 fdplock(fdp); 875 if (error != 0) { 876 if (nfds > 0) { 877 rp = ((struct fdpass *)CMSG_DATA(cm)); 878 unp_discard(rp, nfds); 879 } 880 goto out; 881 } 882 883 /* 884 * First loop -- allocate file descriptor table slots for the 885 * new descriptors. 886 */ 887 rp = ((struct fdpass *)CMSG_DATA(cm)); 888 for (i = 0; i < nfds; i++) { 889 if ((error = fdalloc(p, 0, &fds[i])) != 0) { 890 /* 891 * Back out what we've done so far. 892 */ 893 for (--i; i >= 0; i--) 894 fdremove(fdp, fds[i]); 895 896 if (error == ENOSPC) { 897 fdexpand(p); 898 error = 0; 899 } else { 900 /* 901 * This is the error that has historically 902 * been returned, and some callers may 903 * expect it. 904 */ 905 error = EMSGSIZE; 906 } 907 fdpunlock(fdp); 908 goto restart; 909 } 910 911 /* 912 * Make the slot reference the descriptor so that 913 * fdalloc() works properly.. We finalize it all 914 * in the loop below. 915 */ 916 mtx_enter(&fdp->fd_fplock); 917 KASSERT(fdp->fd_ofiles[fds[i]] == NULL); 918 fdp->fd_ofiles[fds[i]] = rp->fp; 919 mtx_leave(&fdp->fd_fplock); 920 921 fdp->fd_ofileflags[fds[i]] = (rp->flags & UF_PLEDGED); 922 if (flags & MSG_CMSG_CLOEXEC) 923 fdp->fd_ofileflags[fds[i]] |= UF_EXCLOSE; 924 925 rp++; 926 } 927 928 /* 929 * Now that adding them has succeeded, update all of the 930 * descriptor passing state. 931 */ 932 rp = (struct fdpass *)CMSG_DATA(cm); 933 for (i = 0; i < nfds; i++) { 934 struct unpcb *unp; 935 936 fp = rp->fp; 937 rp++; 938 if ((unp = fptounp(fp)) != NULL) 939 unp->unp_msgcount--; 940 unp_rights--; 941 } 942 943 /* 944 * Copy temporary array to message and adjust length, in case of 945 * transition from large struct file pointers to ints. 946 */ 947 memcpy(CMSG_DATA(cm), fds, nfds * sizeof(int)); 948 cm->cmsg_len = CMSG_LEN(nfds * sizeof(int)); 949 rights->m_len = CMSG_LEN(nfds * sizeof(int)); 950 out: 951 fdpunlock(fdp); 952 if (fds != NULL) 953 free(fds, M_TEMP, nfds * sizeof(int)); 954 return (error); 955 } 956 957 int 958 unp_internalize(struct mbuf *control, struct proc *p) 959 { 960 struct filedesc *fdp = p->p_fd; 961 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 962 struct fdpass *rp; 963 struct file *fp; 964 struct unpcb *unp; 965 int i, error; 966 int nfds, *ip, fd, neededspace; 967 968 rw_assert_wrlock(&unp_lock); 969 970 /* 971 * Check for two potential msg_controllen values because 972 * IETF stuck their nose in a place it does not belong. 973 */ 974 if (control->m_len < CMSG_LEN(0) || cm->cmsg_len < CMSG_LEN(0)) 975 return (EINVAL); 976 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || 977 !(cm->cmsg_len == control->m_len || 978 control->m_len == CMSG_ALIGN(cm->cmsg_len))) 979 return (EINVAL); 980 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int); 981 982 if (unp_rights + nfds > maxfiles / 10) 983 return (EMFILE); 984 985 /* Make sure we have room for the struct file pointers */ 986 morespace: 987 neededspace = CMSG_SPACE(nfds * sizeof(struct fdpass)) - 988 control->m_len; 989 if (neededspace > m_trailingspace(control)) { 990 char *tmp; 991 /* if we already have a cluster, the message is just too big */ 992 if (control->m_flags & M_EXT) 993 return (E2BIG); 994 995 /* copy cmsg data temporarily out of the mbuf */ 996 tmp = malloc(control->m_len, M_TEMP, M_WAITOK); 997 memcpy(tmp, mtod(control, caddr_t), control->m_len); 998 999 /* allocate a cluster and try again */ 1000 MCLGET(control, M_WAIT); 1001 if ((control->m_flags & M_EXT) == 0) { 1002 free(tmp, M_TEMP, control->m_len); 1003 return (ENOBUFS); /* allocation failed */ 1004 } 1005 1006 /* copy the data back into the cluster */ 1007 cm = mtod(control, struct cmsghdr *); 1008 memcpy(cm, tmp, control->m_len); 1009 free(tmp, M_TEMP, control->m_len); 1010 goto morespace; 1011 } 1012 1013 /* adjust message & mbuf to note amount of space actually used. */ 1014 cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct fdpass)); 1015 control->m_len = CMSG_SPACE(nfds * sizeof(struct fdpass)); 1016 1017 ip = ((int *)CMSG_DATA(cm)) + nfds - 1; 1018 rp = ((struct fdpass *)CMSG_DATA(cm)) + nfds - 1; 1019 fdplock(fdp); 1020 for (i = 0; i < nfds; i++) { 1021 memcpy(&fd, ip, sizeof fd); 1022 ip--; 1023 if ((fp = fd_getfile(fdp, fd)) == NULL) { 1024 error = EBADF; 1025 goto fail; 1026 } 1027 if (fp->f_count >= FDUP_MAX_COUNT) { 1028 error = EDEADLK; 1029 goto fail; 1030 } 1031 error = pledge_sendfd(p, fp); 1032 if (error) 1033 goto fail; 1034 1035 /* kqueue descriptors cannot be copied */ 1036 if (fp->f_type == DTYPE_KQUEUE) { 1037 error = EINVAL; 1038 goto fail; 1039 } 1040 rp->fp = fp; 1041 rp->flags = fdp->fd_ofileflags[fd] & UF_PLEDGED; 1042 rp--; 1043 if ((unp = fptounp(fp)) != NULL) { 1044 unp->unp_file = fp; 1045 unp->unp_msgcount++; 1046 } 1047 unp_rights++; 1048 } 1049 fdpunlock(fdp); 1050 return (0); 1051 fail: 1052 fdpunlock(fdp); 1053 if (fp != NULL) 1054 FRELE(fp, p); 1055 /* Back out what we just did. */ 1056 for ( ; i > 0; i--) { 1057 rp++; 1058 fp = rp->fp; 1059 if ((unp = fptounp(fp)) != NULL) 1060 unp->unp_msgcount--; 1061 FRELE(fp, p); 1062 unp_rights--; 1063 } 1064 1065 return (error); 1066 } 1067 1068 void 1069 unp_gc(void *arg __unused) 1070 { 1071 struct unp_deferral *defer; 1072 struct file *fp; 1073 struct socket *so; 1074 struct unpcb *unp; 1075 int nunref, i; 1076 1077 rw_enter_write(&unp_lock); 1078 1079 if (unp_gcing) 1080 goto unlock; 1081 unp_gcing = 1; 1082 1083 /* close any fds on the deferred list */ 1084 while ((defer = SLIST_FIRST(&unp_deferred)) != NULL) { 1085 SLIST_REMOVE_HEAD(&unp_deferred, ud_link); 1086 for (i = 0; i < defer->ud_n; i++) { 1087 fp = defer->ud_fp[i].fp; 1088 if (fp == NULL) 1089 continue; 1090 /* closef() expects a refcount of 2 */ 1091 FREF(fp); 1092 if ((unp = fptounp(fp)) != NULL) 1093 unp->unp_msgcount--; 1094 unp_rights--; 1095 rw_exit_write(&unp_lock); 1096 (void) closef(fp, NULL); 1097 rw_enter_write(&unp_lock); 1098 } 1099 free(defer, M_TEMP, sizeof(*defer) + 1100 sizeof(struct fdpass) * defer->ud_n); 1101 } 1102 1103 unp_defer = 0; 1104 LIST_FOREACH(unp, &unp_head, unp_link) 1105 unp->unp_flags &= ~(UNP_GCMARK | UNP_GCDEFER | UNP_GCDEAD); 1106 do { 1107 nunref = 0; 1108 LIST_FOREACH(unp, &unp_head, unp_link) { 1109 fp = unp->unp_file; 1110 if (unp->unp_flags & UNP_GCDEFER) { 1111 /* 1112 * This socket is referenced by another 1113 * socket which is known to be live, 1114 * so it's certainly live. 1115 */ 1116 unp->unp_flags &= ~UNP_GCDEFER; 1117 unp_defer--; 1118 } else if (unp->unp_flags & UNP_GCMARK) { 1119 /* marked as live in previous pass */ 1120 continue; 1121 } else if (fp == NULL) { 1122 /* not being passed, so can't be in loop */ 1123 } else if (fp->f_count == 0) { 1124 /* 1125 * Already being closed, let normal close 1126 * path take its course 1127 */ 1128 } else { 1129 /* 1130 * Unreferenced by other sockets so far, 1131 * so if all the references (f_count) are 1132 * from passing (unp_msgcount) then this 1133 * socket is prospectively dead 1134 */ 1135 if (fp->f_count == unp->unp_msgcount) { 1136 nunref++; 1137 unp->unp_flags |= UNP_GCDEAD; 1138 continue; 1139 } 1140 } 1141 1142 /* 1143 * This is the first time we've seen this socket on 1144 * the mark pass and known it has a live reference, 1145 * so mark it, then scan its receive buffer for 1146 * sockets and note them as deferred (== referenced, 1147 * but not yet marked). 1148 */ 1149 unp->unp_flags |= UNP_GCMARK; 1150 1151 so = unp->unp_socket; 1152 unp_scan(so->so_rcv.sb_mb, unp_mark); 1153 } 1154 } while (unp_defer); 1155 1156 /* 1157 * If there are any unreferenced sockets, then for each dispose 1158 * of files in its receive buffer and then close it. 1159 */ 1160 if (nunref) { 1161 LIST_FOREACH(unp, &unp_head, unp_link) { 1162 if (unp->unp_flags & UNP_GCDEAD) 1163 unp_scan(unp->unp_socket->so_rcv.sb_mb, 1164 unp_discard); 1165 } 1166 } 1167 unp_gcing = 0; 1168 unlock: 1169 rw_exit_write(&unp_lock); 1170 } 1171 1172 void 1173 unp_dispose(struct mbuf *m) 1174 { 1175 1176 if (m) 1177 unp_scan(m, unp_discard); 1178 } 1179 1180 void 1181 unp_scan(struct mbuf *m0, void (*op)(struct fdpass *, int)) 1182 { 1183 struct mbuf *m; 1184 struct fdpass *rp; 1185 struct cmsghdr *cm; 1186 int qfds; 1187 1188 while (m0) { 1189 for (m = m0; m; m = m->m_next) { 1190 if (m->m_type == MT_CONTROL && 1191 m->m_len >= sizeof(*cm)) { 1192 cm = mtod(m, struct cmsghdr *); 1193 if (cm->cmsg_level != SOL_SOCKET || 1194 cm->cmsg_type != SCM_RIGHTS) 1195 continue; 1196 qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm)) 1197 / sizeof(struct fdpass); 1198 if (qfds > 0) { 1199 rp = (struct fdpass *)CMSG_DATA(cm); 1200 op(rp, qfds); 1201 } 1202 break; /* XXX, but saves time */ 1203 } 1204 } 1205 m0 = m0->m_nextpkt; 1206 } 1207 } 1208 1209 void 1210 unp_mark(struct fdpass *rp, int nfds) 1211 { 1212 struct unpcb *unp; 1213 int i; 1214 1215 rw_assert_wrlock(&unp_lock); 1216 1217 for (i = 0; i < nfds; i++) { 1218 if (rp[i].fp == NULL) 1219 continue; 1220 1221 unp = fptounp(rp[i].fp); 1222 if (unp == NULL) 1223 continue; 1224 1225 if (unp->unp_flags & (UNP_GCMARK|UNP_GCDEFER)) 1226 continue; 1227 1228 unp_defer++; 1229 unp->unp_flags |= UNP_GCDEFER; 1230 unp->unp_flags &= ~UNP_GCDEAD; 1231 } 1232 } 1233 1234 void 1235 unp_discard(struct fdpass *rp, int nfds) 1236 { 1237 struct unp_deferral *defer; 1238 1239 rw_assert_wrlock(&unp_lock); 1240 1241 /* copy the file pointers to a deferral structure */ 1242 defer = malloc(sizeof(*defer) + sizeof(*rp) * nfds, M_TEMP, M_WAITOK); 1243 defer->ud_n = nfds; 1244 memcpy(&defer->ud_fp[0], rp, sizeof(*rp) * nfds); 1245 memset(rp, 0, sizeof(*rp) * nfds); 1246 SLIST_INSERT_HEAD(&unp_deferred, defer, ud_link); 1247 1248 task_add(systqmp, &unp_gc_task); 1249 } 1250 1251 int 1252 unp_nam2sun(struct mbuf *nam, struct sockaddr_un **sun, size_t *pathlen) 1253 { 1254 struct sockaddr *sa = mtod(nam, struct sockaddr *); 1255 size_t size, len; 1256 1257 if (nam->m_len < offsetof(struct sockaddr, sa_data)) 1258 return EINVAL; 1259 if (sa->sa_family != AF_UNIX) 1260 return EAFNOSUPPORT; 1261 if (sa->sa_len != nam->m_len) 1262 return EINVAL; 1263 if (sa->sa_len > sizeof(struct sockaddr_un)) 1264 return EINVAL; 1265 *sun = (struct sockaddr_un *)sa; 1266 1267 /* ensure that sun_path is NUL terminated and fits */ 1268 size = (*sun)->sun_len - offsetof(struct sockaddr_un, sun_path); 1269 len = strnlen((*sun)->sun_path, size); 1270 if (len == sizeof((*sun)->sun_path)) 1271 return EINVAL; 1272 if (len == size) { 1273 if (m_trailingspace(nam) == 0) 1274 return EINVAL; 1275 nam->m_len++; 1276 (*sun)->sun_len++; 1277 (*sun)->sun_path[len] = '\0'; 1278 } 1279 if (pathlen != NULL) 1280 *pathlen = len; 1281 1282 return 0; 1283 } 1284