1 /* $OpenBSD: uipc_usrreq.c,v 1.145 2021/04/29 20:13:25 mvs Exp $ */ 2 /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/filedesc.h> 39 #include <sys/domain.h> 40 #include <sys/protosw.h> 41 #include <sys/queue.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/unpcb.h> 45 #include <sys/un.h> 46 #include <sys/namei.h> 47 #include <sys/vnode.h> 48 #include <sys/file.h> 49 #include <sys/stat.h> 50 #include <sys/mbuf.h> 51 #include <sys/task.h> 52 #include <sys/pledge.h> 53 #include <sys/pool.h> 54 #include <sys/rwlock.h> 55 56 /* 57 * Locks used to protect global data and struct members: 58 * I immutable after creation 59 * U unp_lock 60 */ 61 struct rwlock unp_lock = RWLOCK_INITIALIZER("unplock"); 62 63 /* 64 * Stack of sets of files that were passed over a socket but were 65 * not received and need to be closed. 66 */ 67 struct unp_deferral { 68 SLIST_ENTRY(unp_deferral) ud_link; /* [U] */ 69 int ud_n; /* [I] */ 70 /* followed by ud_n struct fdpass */ 71 struct fdpass ud_fp[]; /* [I] */ 72 }; 73 74 void uipc_setaddr(const struct unpcb *, struct mbuf *); 75 void unp_discard(struct fdpass *, int); 76 void unp_mark(struct fdpass *, int); 77 void unp_scan(struct mbuf *, void (*)(struct fdpass *, int)); 78 int unp_nam2sun(struct mbuf *, struct sockaddr_un **, size_t *); 79 80 struct pool unpcb_pool; 81 struct task unp_gc_task = TASK_INITIALIZER(unp_gc, NULL); 82 83 /* 84 * Unix communications domain. 85 * 86 * TODO: 87 * RDM 88 * rethink name space problems 89 * need a proper out-of-band 90 */ 91 const struct sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX }; 92 93 /* [U] list of all UNIX domain sockets, for unp_gc() */ 94 LIST_HEAD(unp_head, unpcb) unp_head = 95 LIST_HEAD_INITIALIZER(unp_head); 96 /* [U] list of sets of files that were sent over sockets that are now closed */ 97 SLIST_HEAD(,unp_deferral) unp_deferred = 98 SLIST_HEAD_INITIALIZER(unp_deferred); 99 100 ino_t unp_ino; /* [U] prototype for fake inode numbers */ 101 int unp_rights; /* [U] file descriptors in flight */ 102 int unp_defer; /* [U] number of deferred fp to close by the GC task */ 103 int unp_gcing; /* [U] GC task currently running */ 104 105 void 106 unp_init(void) 107 { 108 pool_init(&unpcb_pool, sizeof(struct unpcb), 0, 109 IPL_SOFTNET, 0, "unpcb", NULL); 110 } 111 112 void 113 uipc_setaddr(const struct unpcb *unp, struct mbuf *nam) 114 { 115 if (unp != NULL && unp->unp_addr != NULL) { 116 nam->m_len = unp->unp_addr->m_len; 117 memcpy(mtod(nam, caddr_t), mtod(unp->unp_addr, caddr_t), 118 nam->m_len); 119 } else { 120 nam->m_len = sizeof(sun_noname); 121 memcpy(mtod(nam, struct sockaddr *), &sun_noname, 122 nam->m_len); 123 } 124 } 125 126 int 127 uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 128 struct mbuf *control, struct proc *p) 129 { 130 struct unpcb *unp = sotounpcb(so); 131 struct unpcb *unp2; 132 struct socket *so2; 133 int error = 0; 134 135 if (req == PRU_CONTROL) 136 return (EOPNOTSUPP); 137 if (req != PRU_SEND && control && control->m_len) { 138 error = EOPNOTSUPP; 139 goto release; 140 } 141 if (unp == NULL) { 142 error = EINVAL; 143 goto release; 144 } 145 146 switch (req) { 147 148 case PRU_BIND: 149 error = unp_bind(unp, nam, p); 150 break; 151 152 case PRU_LISTEN: 153 if (unp->unp_vnode == NULL) 154 error = EINVAL; 155 break; 156 157 case PRU_CONNECT: 158 error = unp_connect(so, nam, p); 159 break; 160 161 case PRU_CONNECT2: 162 error = unp_connect2(so, (struct socket *)nam); 163 if (!error) { 164 unp->unp_connid.uid = p->p_ucred->cr_uid; 165 unp->unp_connid.gid = p->p_ucred->cr_gid; 166 unp->unp_connid.pid = p->p_p->ps_pid; 167 unp->unp_flags |= UNP_FEIDS; 168 unp2 = sotounpcb((struct socket *)nam); 169 unp2->unp_connid.uid = p->p_ucred->cr_uid; 170 unp2->unp_connid.gid = p->p_ucred->cr_gid; 171 unp2->unp_connid.pid = p->p_p->ps_pid; 172 unp2->unp_flags |= UNP_FEIDS; 173 } 174 break; 175 176 case PRU_DISCONNECT: 177 unp_disconnect(unp); 178 break; 179 180 case PRU_ACCEPT: 181 /* 182 * Pass back name of connected socket, 183 * if it was bound and we are still connected 184 * (our peer may have closed already!). 185 */ 186 uipc_setaddr(unp->unp_conn, nam); 187 break; 188 189 case PRU_SHUTDOWN: 190 socantsendmore(so); 191 unp_shutdown(unp); 192 break; 193 194 case PRU_RCVD: 195 switch (so->so_type) { 196 197 case SOCK_DGRAM: 198 panic("uipc 1"); 199 /*NOTREACHED*/ 200 201 case SOCK_STREAM: 202 case SOCK_SEQPACKET: 203 if (unp->unp_conn == NULL) 204 break; 205 so2 = unp->unp_conn->unp_socket; 206 /* 207 * Adjust backpressure on sender 208 * and wakeup any waiting to write. 209 */ 210 so2->so_snd.sb_mbcnt = so->so_rcv.sb_mbcnt; 211 so2->so_snd.sb_cc = so->so_rcv.sb_cc; 212 sowwakeup(so2); 213 break; 214 215 default: 216 panic("uipc 2"); 217 } 218 break; 219 220 case PRU_SEND: 221 if (control && (error = unp_internalize(control, p))) 222 break; 223 switch (so->so_type) { 224 225 case SOCK_DGRAM: { 226 const struct sockaddr *from; 227 228 if (nam) { 229 if (unp->unp_conn) { 230 error = EISCONN; 231 break; 232 } 233 error = unp_connect(so, nam, p); 234 if (error) 235 break; 236 } else { 237 if (unp->unp_conn == NULL) { 238 error = ENOTCONN; 239 break; 240 } 241 } 242 so2 = unp->unp_conn->unp_socket; 243 if (unp->unp_addr) 244 from = mtod(unp->unp_addr, struct sockaddr *); 245 else 246 from = &sun_noname; 247 if (sbappendaddr(so2, &so2->so_rcv, from, m, control)) { 248 sorwakeup(so2); 249 m = NULL; 250 control = NULL; 251 } else 252 error = ENOBUFS; 253 if (nam) 254 unp_disconnect(unp); 255 break; 256 } 257 258 case SOCK_STREAM: 259 case SOCK_SEQPACKET: 260 if (so->so_state & SS_CANTSENDMORE) { 261 error = EPIPE; 262 break; 263 } 264 if (unp->unp_conn == NULL) { 265 error = ENOTCONN; 266 break; 267 } 268 so2 = unp->unp_conn->unp_socket; 269 /* 270 * Send to paired receive port, and then raise 271 * send buffer counts to maintain backpressure. 272 * Wake up readers. 273 */ 274 if (control) { 275 if (sbappendcontrol(so2, &so2->so_rcv, m, 276 control)) { 277 control = NULL; 278 } else { 279 error = ENOBUFS; 280 break; 281 } 282 } else if (so->so_type == SOCK_SEQPACKET) 283 sbappendrecord(so2, &so2->so_rcv, m); 284 else 285 sbappend(so2, &so2->so_rcv, m); 286 so->so_snd.sb_mbcnt = so2->so_rcv.sb_mbcnt; 287 so->so_snd.sb_cc = so2->so_rcv.sb_cc; 288 if (so2->so_rcv.sb_cc > 0) 289 sorwakeup(so2); 290 m = NULL; 291 break; 292 293 default: 294 panic("uipc 4"); 295 } 296 /* we need to undo unp_internalize in case of errors */ 297 if (control && error) 298 unp_dispose(control); 299 break; 300 301 case PRU_ABORT: 302 unp_drop(unp, ECONNABORTED); 303 break; 304 305 case PRU_SENSE: { 306 struct stat *sb = (struct stat *)m; 307 308 sb->st_blksize = so->so_snd.sb_hiwat; 309 sb->st_dev = NODEV; 310 if (unp->unp_ino == 0) 311 unp->unp_ino = unp_ino++; 312 sb->st_atim.tv_sec = 313 sb->st_mtim.tv_sec = 314 sb->st_ctim.tv_sec = unp->unp_ctime.tv_sec; 315 sb->st_atim.tv_nsec = 316 sb->st_mtim.tv_nsec = 317 sb->st_ctim.tv_nsec = unp->unp_ctime.tv_nsec; 318 sb->st_ino = unp->unp_ino; 319 break; 320 } 321 322 case PRU_RCVOOB: 323 case PRU_SENDOOB: 324 error = EOPNOTSUPP; 325 break; 326 327 case PRU_SOCKADDR: 328 uipc_setaddr(unp, nam); 329 break; 330 331 case PRU_PEERADDR: 332 uipc_setaddr(unp->unp_conn, nam); 333 break; 334 335 case PRU_SLOWTIMO: 336 break; 337 338 default: 339 panic("uipc_usrreq"); 340 } 341 release: 342 if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) { 343 m_freem(control); 344 m_freem(m); 345 } 346 return (error); 347 } 348 349 /* 350 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 351 * for stream sockets, although the total for sender and receiver is 352 * actually only PIPSIZ. 353 * Datagram sockets really use the sendspace as the maximum datagram size, 354 * and don't really want to reserve the sendspace. Their recvspace should 355 * be large enough for at least one max-size datagram plus address. 356 */ 357 #define PIPSIZ 4096 358 u_long unpst_sendspace = PIPSIZ; 359 u_long unpst_recvspace = PIPSIZ; 360 u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 361 u_long unpdg_recvspace = 4*1024; 362 363 int 364 uipc_attach(struct socket *so, int proto) 365 { 366 struct unpcb *unp; 367 int error; 368 369 rw_assert_wrlock(&unp_lock); 370 371 if (so->so_pcb) 372 return EISCONN; 373 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 374 switch (so->so_type) { 375 376 case SOCK_STREAM: 377 case SOCK_SEQPACKET: 378 error = soreserve(so, unpst_sendspace, unpst_recvspace); 379 break; 380 381 case SOCK_DGRAM: 382 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 383 break; 384 385 default: 386 panic("unp_attach"); 387 } 388 if (error) 389 return (error); 390 } 391 unp = pool_get(&unpcb_pool, PR_NOWAIT|PR_ZERO); 392 if (unp == NULL) 393 return (ENOBUFS); 394 unp->unp_socket = so; 395 so->so_pcb = unp; 396 getnanotime(&unp->unp_ctime); 397 LIST_INSERT_HEAD(&unp_head, unp, unp_link); 398 return (0); 399 } 400 401 int 402 uipc_detach(struct socket *so) 403 { 404 struct unpcb *unp = sotounpcb(so); 405 406 if (unp == NULL) 407 return (EINVAL); 408 409 unp_detach(unp); 410 411 return (0); 412 } 413 414 void 415 unp_detach(struct unpcb *unp) 416 { 417 struct socket *so = unp->unp_socket; 418 struct vnode *vp = NULL; 419 420 rw_assert_wrlock(&unp_lock); 421 422 LIST_REMOVE(unp, unp_link); 423 if (unp->unp_vnode) { 424 /* 425 * `v_socket' is only read in unp_connect and 426 * unplock prevents concurrent access. 427 */ 428 429 unp->unp_vnode->v_socket = NULL; 430 vp = unp->unp_vnode; 431 unp->unp_vnode = NULL; 432 } 433 434 if (unp->unp_conn) 435 unp_disconnect(unp); 436 while (!SLIST_EMPTY(&unp->unp_refs)) 437 unp_drop(SLIST_FIRST(&unp->unp_refs), ECONNRESET); 438 soisdisconnected(so); 439 so->so_pcb = NULL; 440 m_freem(unp->unp_addr); 441 pool_put(&unpcb_pool, unp); 442 if (unp_rights) 443 task_add(systqmp, &unp_gc_task); 444 445 if (vp != NULL) { 446 /* 447 * Enforce `i_lock' -> `unplock' because fifo subsystem 448 * requires it. The socket can't be closed concurrently 449 * because the file descriptor reference is 450 * still hold. 451 */ 452 453 sounlock(so, SL_LOCKED); 454 KERNEL_LOCK(); 455 vrele(vp); 456 KERNEL_UNLOCK(); 457 solock(so); 458 } 459 } 460 461 int 462 unp_bind(struct unpcb *unp, struct mbuf *nam, struct proc *p) 463 { 464 struct sockaddr_un *soun; 465 struct mbuf *nam2; 466 struct vnode *vp; 467 struct vattr vattr; 468 int error; 469 struct nameidata nd; 470 size_t pathlen; 471 472 if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING)) 473 return (EINVAL); 474 if (unp->unp_vnode != NULL) 475 return (EINVAL); 476 if ((error = unp_nam2sun(nam, &soun, &pathlen))) 477 return (error); 478 479 nam2 = m_getclr(M_WAITOK, MT_SONAME); 480 nam2->m_len = sizeof(struct sockaddr_un); 481 memcpy(mtod(nam2, struct sockaddr_un *), soun, 482 offsetof(struct sockaddr_un, sun_path) + pathlen); 483 /* No need to NUL terminate: m_getclr() returns zero'd mbufs. */ 484 485 soun = mtod(nam2, struct sockaddr_un *); 486 487 /* Fixup sun_len to keep it in sync with m_len. */ 488 soun->sun_len = nam2->m_len; 489 490 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE, 491 soun->sun_path, p); 492 nd.ni_pledge = PLEDGE_UNIX; 493 494 unp->unp_flags |= UNP_BINDING; 495 496 /* 497 * Enforce `i_lock' -> `unplock' because fifo subsystem 498 * requires it. The socket can't be closed concurrently 499 * because the file descriptor reference is still held. 500 */ 501 502 sounlock(unp->unp_socket, SL_LOCKED); 503 504 KERNEL_LOCK(); 505 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 506 error = namei(&nd); 507 if (error != 0) { 508 m_freem(nam2); 509 solock(unp->unp_socket); 510 goto out; 511 } 512 vp = nd.ni_vp; 513 if (vp != NULL) { 514 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 515 if (nd.ni_dvp == vp) 516 vrele(nd.ni_dvp); 517 else 518 vput(nd.ni_dvp); 519 vrele(vp); 520 m_freem(nam2); 521 error = EADDRINUSE; 522 solock(unp->unp_socket); 523 goto out; 524 } 525 VATTR_NULL(&vattr); 526 vattr.va_type = VSOCK; 527 vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask; 528 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 529 vput(nd.ni_dvp); 530 if (error) { 531 m_freem(nam2); 532 solock(unp->unp_socket); 533 goto out; 534 } 535 solock(unp->unp_socket); 536 unp->unp_addr = nam2; 537 vp = nd.ni_vp; 538 vp->v_socket = unp->unp_socket; 539 unp->unp_vnode = vp; 540 unp->unp_connid.uid = p->p_ucred->cr_uid; 541 unp->unp_connid.gid = p->p_ucred->cr_gid; 542 unp->unp_connid.pid = p->p_p->ps_pid; 543 unp->unp_flags |= UNP_FEIDSBIND; 544 VOP_UNLOCK(vp); 545 out: 546 KERNEL_UNLOCK(); 547 unp->unp_flags &= ~UNP_BINDING; 548 549 return (error); 550 } 551 552 int 553 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p) 554 { 555 struct sockaddr_un *soun; 556 struct vnode *vp; 557 struct socket *so2, *so3; 558 struct unpcb *unp, *unp2, *unp3; 559 struct nameidata nd; 560 int error; 561 562 unp = sotounpcb(so); 563 if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING)) 564 return (EISCONN); 565 if ((error = unp_nam2sun(nam, &soun, NULL))) 566 return (error); 567 568 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p); 569 nd.ni_pledge = PLEDGE_UNIX; 570 571 unp->unp_flags |= UNP_CONNECTING; 572 573 /* 574 * Enforce `i_lock' -> `unplock' because fifo subsystem 575 * requires it. The socket can't be closed concurrently 576 * because the file descriptor reference is still held. 577 */ 578 579 sounlock(so, SL_LOCKED); 580 581 KERNEL_LOCK(); 582 error = namei(&nd); 583 if (error != 0) 584 goto unlock; 585 vp = nd.ni_vp; 586 if (vp->v_type != VSOCK) { 587 error = ENOTSOCK; 588 goto put; 589 } 590 if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0) 591 goto put; 592 solock(so); 593 so2 = vp->v_socket; 594 if (so2 == NULL) { 595 error = ECONNREFUSED; 596 goto put_locked; 597 } 598 if (so->so_type != so2->so_type) { 599 error = EPROTOTYPE; 600 goto put_locked; 601 } 602 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 603 if ((so2->so_options & SO_ACCEPTCONN) == 0 || 604 (so3 = sonewconn(so2, 0)) == 0) { 605 error = ECONNREFUSED; 606 goto put_locked; 607 } 608 unp2 = sotounpcb(so2); 609 unp3 = sotounpcb(so3); 610 if (unp2->unp_addr) 611 unp3->unp_addr = 612 m_copym(unp2->unp_addr, 0, M_COPYALL, M_NOWAIT); 613 unp3->unp_connid.uid = p->p_ucred->cr_uid; 614 unp3->unp_connid.gid = p->p_ucred->cr_gid; 615 unp3->unp_connid.pid = p->p_p->ps_pid; 616 unp3->unp_flags |= UNP_FEIDS; 617 so2 = so3; 618 if (unp2->unp_flags & UNP_FEIDSBIND) { 619 unp->unp_connid = unp2->unp_connid; 620 unp->unp_flags |= UNP_FEIDS; 621 } 622 } 623 error = unp_connect2(so, so2); 624 put_locked: 625 sounlock(so, SL_LOCKED); 626 put: 627 vput(vp); 628 unlock: 629 KERNEL_UNLOCK(); 630 solock(so); 631 unp->unp_flags &= ~UNP_CONNECTING; 632 633 return (error); 634 } 635 636 int 637 unp_connect2(struct socket *so, struct socket *so2) 638 { 639 struct unpcb *unp = sotounpcb(so); 640 struct unpcb *unp2; 641 642 rw_assert_wrlock(&unp_lock); 643 644 if (so2->so_type != so->so_type) 645 return (EPROTOTYPE); 646 unp2 = sotounpcb(so2); 647 unp->unp_conn = unp2; 648 switch (so->so_type) { 649 650 case SOCK_DGRAM: 651 SLIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_nextref); 652 soisconnected(so); 653 break; 654 655 case SOCK_STREAM: 656 case SOCK_SEQPACKET: 657 unp2->unp_conn = unp; 658 soisconnected(so); 659 soisconnected(so2); 660 break; 661 662 default: 663 panic("unp_connect2"); 664 } 665 return (0); 666 } 667 668 void 669 unp_disconnect(struct unpcb *unp) 670 { 671 struct unpcb *unp2 = unp->unp_conn; 672 673 if (unp2 == NULL) 674 return; 675 unp->unp_conn = NULL; 676 switch (unp->unp_socket->so_type) { 677 678 case SOCK_DGRAM: 679 SLIST_REMOVE(&unp2->unp_refs, unp, unpcb, unp_nextref); 680 unp->unp_socket->so_state &= ~SS_ISCONNECTED; 681 break; 682 683 case SOCK_STREAM: 684 case SOCK_SEQPACKET: 685 unp->unp_socket->so_snd.sb_mbcnt = 0; 686 unp->unp_socket->so_snd.sb_cc = 0; 687 soisdisconnected(unp->unp_socket); 688 unp2->unp_conn = NULL; 689 unp2->unp_socket->so_snd.sb_mbcnt = 0; 690 unp2->unp_socket->so_snd.sb_cc = 0; 691 soisdisconnected(unp2->unp_socket); 692 break; 693 } 694 } 695 696 void 697 unp_shutdown(struct unpcb *unp) 698 { 699 struct socket *so; 700 701 switch (unp->unp_socket->so_type) { 702 case SOCK_STREAM: 703 case SOCK_SEQPACKET: 704 if (unp->unp_conn && (so = unp->unp_conn->unp_socket)) 705 socantrcvmore(so); 706 break; 707 default: 708 break; 709 } 710 } 711 712 void 713 unp_drop(struct unpcb *unp, int errno) 714 { 715 struct socket *so = unp->unp_socket; 716 717 rw_assert_wrlock(&unp_lock); 718 719 so->so_error = errno; 720 unp_disconnect(unp); 721 if (so->so_head) { 722 so->so_pcb = NULL; 723 /* 724 * As long as `unp_lock' is taken before entering 725 * uipc_usrreq() releasing it here would lead to a 726 * double unlock. 727 */ 728 sofree(so, SL_NOUNLOCK); 729 m_freem(unp->unp_addr); 730 pool_put(&unpcb_pool, unp); 731 } 732 } 733 734 #ifdef notdef 735 unp_drain(void) 736 { 737 738 } 739 #endif 740 741 extern struct domain unixdomain; 742 743 static struct unpcb * 744 fptounp(struct file *fp) 745 { 746 struct socket *so; 747 748 if (fp->f_type != DTYPE_SOCKET) 749 return (NULL); 750 if ((so = fp->f_data) == NULL) 751 return (NULL); 752 if (so->so_proto->pr_domain != &unixdomain) 753 return (NULL); 754 return (sotounpcb(so)); 755 } 756 757 int 758 unp_externalize(struct mbuf *rights, socklen_t controllen, int flags) 759 { 760 struct proc *p = curproc; /* XXX */ 761 struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 762 struct filedesc *fdp = p->p_fd; 763 int i, *fds = NULL; 764 struct fdpass *rp; 765 struct file *fp; 766 int nfds, error = 0; 767 768 rw_assert_wrlock(&unp_lock); 769 770 /* 771 * This code only works because SCM_RIGHTS is the only supported 772 * control message type on unix sockets. Enforce this here. 773 */ 774 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET) 775 return EINVAL; 776 777 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / 778 sizeof(struct fdpass); 779 if (controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) 780 controllen = 0; 781 else 782 controllen -= CMSG_ALIGN(sizeof(struct cmsghdr)); 783 if (nfds > controllen / sizeof(int)) { 784 error = EMSGSIZE; 785 goto restart; 786 } 787 788 /* Make sure the recipient should be able to see the descriptors.. */ 789 rp = (struct fdpass *)CMSG_DATA(cm); 790 791 /* fdp->fd_rdir requires KERNEL_LOCK() */ 792 KERNEL_LOCK(); 793 794 for (i = 0; i < nfds; i++) { 795 fp = rp->fp; 796 rp++; 797 error = pledge_recvfd(p, fp); 798 if (error) 799 break; 800 801 /* 802 * No to block devices. If passing a directory, 803 * make sure that it is underneath the root. 804 */ 805 if (fdp->fd_rdir != NULL && fp->f_type == DTYPE_VNODE) { 806 struct vnode *vp = (struct vnode *)fp->f_data; 807 808 if (vp->v_type == VBLK || 809 (vp->v_type == VDIR && 810 !vn_isunder(vp, fdp->fd_rdir, p))) { 811 error = EPERM; 812 break; 813 } 814 } 815 } 816 817 KERNEL_UNLOCK(); 818 819 fds = mallocarray(nfds, sizeof(int), M_TEMP, M_WAITOK); 820 821 restart: 822 fdplock(fdp); 823 if (error != 0) { 824 if (nfds > 0) { 825 rp = ((struct fdpass *)CMSG_DATA(cm)); 826 unp_discard(rp, nfds); 827 } 828 goto out; 829 } 830 831 /* 832 * First loop -- allocate file descriptor table slots for the 833 * new descriptors. 834 */ 835 rp = ((struct fdpass *)CMSG_DATA(cm)); 836 for (i = 0; i < nfds; i++) { 837 if ((error = fdalloc(p, 0, &fds[i])) != 0) { 838 /* 839 * Back out what we've done so far. 840 */ 841 for (--i; i >= 0; i--) 842 fdremove(fdp, fds[i]); 843 844 if (error == ENOSPC) { 845 fdexpand(p); 846 error = 0; 847 } else { 848 /* 849 * This is the error that has historically 850 * been returned, and some callers may 851 * expect it. 852 */ 853 error = EMSGSIZE; 854 } 855 fdpunlock(fdp); 856 goto restart; 857 } 858 859 /* 860 * Make the slot reference the descriptor so that 861 * fdalloc() works properly.. We finalize it all 862 * in the loop below. 863 */ 864 mtx_enter(&fdp->fd_fplock); 865 KASSERT(fdp->fd_ofiles[fds[i]] == NULL); 866 fdp->fd_ofiles[fds[i]] = rp->fp; 867 mtx_leave(&fdp->fd_fplock); 868 869 fdp->fd_ofileflags[fds[i]] = (rp->flags & UF_PLEDGED); 870 if (flags & MSG_CMSG_CLOEXEC) 871 fdp->fd_ofileflags[fds[i]] |= UF_EXCLOSE; 872 873 rp++; 874 } 875 876 /* 877 * Now that adding them has succeeded, update all of the 878 * descriptor passing state. 879 */ 880 rp = (struct fdpass *)CMSG_DATA(cm); 881 for (i = 0; i < nfds; i++) { 882 struct unpcb *unp; 883 884 fp = rp->fp; 885 rp++; 886 if ((unp = fptounp(fp)) != NULL) 887 unp->unp_msgcount--; 888 unp_rights--; 889 } 890 891 /* 892 * Copy temporary array to message and adjust length, in case of 893 * transition from large struct file pointers to ints. 894 */ 895 memcpy(CMSG_DATA(cm), fds, nfds * sizeof(int)); 896 cm->cmsg_len = CMSG_LEN(nfds * sizeof(int)); 897 rights->m_len = CMSG_LEN(nfds * sizeof(int)); 898 out: 899 fdpunlock(fdp); 900 if (fds != NULL) 901 free(fds, M_TEMP, nfds * sizeof(int)); 902 return (error); 903 } 904 905 int 906 unp_internalize(struct mbuf *control, struct proc *p) 907 { 908 struct filedesc *fdp = p->p_fd; 909 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 910 struct fdpass *rp; 911 struct file *fp; 912 struct unpcb *unp; 913 int i, error; 914 int nfds, *ip, fd, neededspace; 915 916 rw_assert_wrlock(&unp_lock); 917 918 /* 919 * Check for two potential msg_controllen values because 920 * IETF stuck their nose in a place it does not belong. 921 */ 922 if (control->m_len < CMSG_LEN(0) || cm->cmsg_len < CMSG_LEN(0)) 923 return (EINVAL); 924 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || 925 !(cm->cmsg_len == control->m_len || 926 control->m_len == CMSG_ALIGN(cm->cmsg_len))) 927 return (EINVAL); 928 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int); 929 930 if (unp_rights + nfds > maxfiles / 10) 931 return (EMFILE); 932 933 /* Make sure we have room for the struct file pointers */ 934 morespace: 935 neededspace = CMSG_SPACE(nfds * sizeof(struct fdpass)) - 936 control->m_len; 937 if (neededspace > m_trailingspace(control)) { 938 char *tmp; 939 /* if we already have a cluster, the message is just too big */ 940 if (control->m_flags & M_EXT) 941 return (E2BIG); 942 943 /* copy cmsg data temporarily out of the mbuf */ 944 tmp = malloc(control->m_len, M_TEMP, M_WAITOK); 945 memcpy(tmp, mtod(control, caddr_t), control->m_len); 946 947 /* allocate a cluster and try again */ 948 MCLGET(control, M_WAIT); 949 if ((control->m_flags & M_EXT) == 0) { 950 free(tmp, M_TEMP, control->m_len); 951 return (ENOBUFS); /* allocation failed */ 952 } 953 954 /* copy the data back into the cluster */ 955 cm = mtod(control, struct cmsghdr *); 956 memcpy(cm, tmp, control->m_len); 957 free(tmp, M_TEMP, control->m_len); 958 goto morespace; 959 } 960 961 /* adjust message & mbuf to note amount of space actually used. */ 962 cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct fdpass)); 963 control->m_len = CMSG_SPACE(nfds * sizeof(struct fdpass)); 964 965 ip = ((int *)CMSG_DATA(cm)) + nfds - 1; 966 rp = ((struct fdpass *)CMSG_DATA(cm)) + nfds - 1; 967 fdplock(fdp); 968 for (i = 0; i < nfds; i++) { 969 memcpy(&fd, ip, sizeof fd); 970 ip--; 971 if ((fp = fd_getfile(fdp, fd)) == NULL) { 972 error = EBADF; 973 goto fail; 974 } 975 if (fp->f_count >= FDUP_MAX_COUNT) { 976 error = EDEADLK; 977 goto fail; 978 } 979 error = pledge_sendfd(p, fp); 980 if (error) 981 goto fail; 982 983 /* kqueue descriptors cannot be copied */ 984 if (fp->f_type == DTYPE_KQUEUE) { 985 error = EINVAL; 986 goto fail; 987 } 988 rp->fp = fp; 989 rp->flags = fdp->fd_ofileflags[fd] & UF_PLEDGED; 990 rp--; 991 if ((unp = fptounp(fp)) != NULL) { 992 unp->unp_file = fp; 993 unp->unp_msgcount++; 994 } 995 unp_rights++; 996 } 997 fdpunlock(fdp); 998 return (0); 999 fail: 1000 fdpunlock(fdp); 1001 if (fp != NULL) 1002 FRELE(fp, p); 1003 /* Back out what we just did. */ 1004 for ( ; i > 0; i--) { 1005 rp++; 1006 fp = rp->fp; 1007 if ((unp = fptounp(fp)) != NULL) 1008 unp->unp_msgcount--; 1009 FRELE(fp, p); 1010 unp_rights--; 1011 } 1012 1013 return (error); 1014 } 1015 1016 void 1017 unp_gc(void *arg __unused) 1018 { 1019 struct unp_deferral *defer; 1020 struct file *fp; 1021 struct socket *so; 1022 struct unpcb *unp; 1023 int nunref, i; 1024 1025 rw_enter_write(&unp_lock); 1026 1027 if (unp_gcing) 1028 goto unlock; 1029 unp_gcing = 1; 1030 1031 /* close any fds on the deferred list */ 1032 while ((defer = SLIST_FIRST(&unp_deferred)) != NULL) { 1033 SLIST_REMOVE_HEAD(&unp_deferred, ud_link); 1034 for (i = 0; i < defer->ud_n; i++) { 1035 fp = defer->ud_fp[i].fp; 1036 if (fp == NULL) 1037 continue; 1038 /* closef() expects a refcount of 2 */ 1039 FREF(fp); 1040 if ((unp = fptounp(fp)) != NULL) 1041 unp->unp_msgcount--; 1042 unp_rights--; 1043 rw_exit_write(&unp_lock); 1044 (void) closef(fp, NULL); 1045 rw_enter_write(&unp_lock); 1046 } 1047 free(defer, M_TEMP, sizeof(*defer) + 1048 sizeof(struct fdpass) * defer->ud_n); 1049 } 1050 1051 unp_defer = 0; 1052 LIST_FOREACH(unp, &unp_head, unp_link) 1053 unp->unp_flags &= ~(UNP_GCMARK | UNP_GCDEFER | UNP_GCDEAD); 1054 do { 1055 nunref = 0; 1056 LIST_FOREACH(unp, &unp_head, unp_link) { 1057 fp = unp->unp_file; 1058 if (unp->unp_flags & UNP_GCDEFER) { 1059 /* 1060 * This socket is referenced by another 1061 * socket which is known to be live, 1062 * so it's certainly live. 1063 */ 1064 unp->unp_flags &= ~UNP_GCDEFER; 1065 unp_defer--; 1066 } else if (unp->unp_flags & UNP_GCMARK) { 1067 /* marked as live in previous pass */ 1068 continue; 1069 } else if (fp == NULL) { 1070 /* not being passed, so can't be in loop */ 1071 } else if (fp->f_count == 0) { 1072 /* 1073 * Already being closed, let normal close 1074 * path take its course 1075 */ 1076 } else { 1077 /* 1078 * Unreferenced by other sockets so far, 1079 * so if all the references (f_count) are 1080 * from passing (unp_msgcount) then this 1081 * socket is prospectively dead 1082 */ 1083 if (fp->f_count == unp->unp_msgcount) { 1084 nunref++; 1085 unp->unp_flags |= UNP_GCDEAD; 1086 continue; 1087 } 1088 } 1089 1090 /* 1091 * This is the first time we've seen this socket on 1092 * the mark pass and known it has a live reference, 1093 * so mark it, then scan its receive buffer for 1094 * sockets and note them as deferred (== referenced, 1095 * but not yet marked). 1096 */ 1097 unp->unp_flags |= UNP_GCMARK; 1098 1099 so = unp->unp_socket; 1100 unp_scan(so->so_rcv.sb_mb, unp_mark); 1101 } 1102 } while (unp_defer); 1103 1104 /* 1105 * If there are any unreferenced sockets, then for each dispose 1106 * of files in its receive buffer and then close it. 1107 */ 1108 if (nunref) { 1109 LIST_FOREACH(unp, &unp_head, unp_link) { 1110 if (unp->unp_flags & UNP_GCDEAD) 1111 unp_scan(unp->unp_socket->so_rcv.sb_mb, 1112 unp_discard); 1113 } 1114 } 1115 unp_gcing = 0; 1116 unlock: 1117 rw_exit_write(&unp_lock); 1118 } 1119 1120 void 1121 unp_dispose(struct mbuf *m) 1122 { 1123 1124 if (m) 1125 unp_scan(m, unp_discard); 1126 } 1127 1128 void 1129 unp_scan(struct mbuf *m0, void (*op)(struct fdpass *, int)) 1130 { 1131 struct mbuf *m; 1132 struct fdpass *rp; 1133 struct cmsghdr *cm; 1134 int qfds; 1135 1136 while (m0) { 1137 for (m = m0; m; m = m->m_next) { 1138 if (m->m_type == MT_CONTROL && 1139 m->m_len >= sizeof(*cm)) { 1140 cm = mtod(m, struct cmsghdr *); 1141 if (cm->cmsg_level != SOL_SOCKET || 1142 cm->cmsg_type != SCM_RIGHTS) 1143 continue; 1144 qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm)) 1145 / sizeof(struct fdpass); 1146 if (qfds > 0) { 1147 rp = (struct fdpass *)CMSG_DATA(cm); 1148 op(rp, qfds); 1149 } 1150 break; /* XXX, but saves time */ 1151 } 1152 } 1153 m0 = m0->m_nextpkt; 1154 } 1155 } 1156 1157 void 1158 unp_mark(struct fdpass *rp, int nfds) 1159 { 1160 struct unpcb *unp; 1161 int i; 1162 1163 rw_assert_wrlock(&unp_lock); 1164 1165 for (i = 0; i < nfds; i++) { 1166 if (rp[i].fp == NULL) 1167 continue; 1168 1169 unp = fptounp(rp[i].fp); 1170 if (unp == NULL) 1171 continue; 1172 1173 if (unp->unp_flags & (UNP_GCMARK|UNP_GCDEFER)) 1174 continue; 1175 1176 unp_defer++; 1177 unp->unp_flags |= UNP_GCDEFER; 1178 unp->unp_flags &= ~UNP_GCDEAD; 1179 } 1180 } 1181 1182 void 1183 unp_discard(struct fdpass *rp, int nfds) 1184 { 1185 struct unp_deferral *defer; 1186 1187 rw_assert_wrlock(&unp_lock); 1188 1189 /* copy the file pointers to a deferral structure */ 1190 defer = malloc(sizeof(*defer) + sizeof(*rp) * nfds, M_TEMP, M_WAITOK); 1191 defer->ud_n = nfds; 1192 memcpy(&defer->ud_fp[0], rp, sizeof(*rp) * nfds); 1193 memset(rp, 0, sizeof(*rp) * nfds); 1194 SLIST_INSERT_HEAD(&unp_deferred, defer, ud_link); 1195 1196 task_add(systqmp, &unp_gc_task); 1197 } 1198 1199 int 1200 unp_nam2sun(struct mbuf *nam, struct sockaddr_un **sun, size_t *pathlen) 1201 { 1202 struct sockaddr *sa = mtod(nam, struct sockaddr *); 1203 size_t size, len; 1204 1205 if (nam->m_len < offsetof(struct sockaddr, sa_data)) 1206 return EINVAL; 1207 if (sa->sa_family != AF_UNIX) 1208 return EAFNOSUPPORT; 1209 if (sa->sa_len != nam->m_len) 1210 return EINVAL; 1211 if (sa->sa_len > sizeof(struct sockaddr_un)) 1212 return EINVAL; 1213 *sun = (struct sockaddr_un *)sa; 1214 1215 /* ensure that sun_path is NUL terminated and fits */ 1216 size = (*sun)->sun_len - offsetof(struct sockaddr_un, sun_path); 1217 len = strnlen((*sun)->sun_path, size); 1218 if (len == sizeof((*sun)->sun_path)) 1219 return EINVAL; 1220 if (len == size) { 1221 if (m_trailingspace(nam) == 0) 1222 return EINVAL; 1223 nam->m_len++; 1224 (*sun)->sun_len++; 1225 (*sun)->sun_path[len] = '\0'; 1226 } 1227 if (pathlen != NULL) 1228 *pathlen = len; 1229 1230 return 0; 1231 } 1232