1 /* $OpenBSD: uipc_usrreq.c,v 1.144 2021/02/22 19:14:01 mvs Exp $ */ 2 /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/filedesc.h> 39 #include <sys/domain.h> 40 #include <sys/protosw.h> 41 #include <sys/queue.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/unpcb.h> 45 #include <sys/un.h> 46 #include <sys/namei.h> 47 #include <sys/vnode.h> 48 #include <sys/file.h> 49 #include <sys/stat.h> 50 #include <sys/mbuf.h> 51 #include <sys/task.h> 52 #include <sys/pledge.h> 53 #include <sys/pool.h> 54 #include <sys/rwlock.h> 55 56 /* 57 * Locks used to protect global data and struct members: 58 * I immutable after creation 59 * U unp_lock 60 */ 61 struct rwlock unp_lock = RWLOCK_INITIALIZER("unplock"); 62 63 /* 64 * Stack of sets of files that were passed over a socket but were 65 * not received and need to be closed. 66 */ 67 struct unp_deferral { 68 SLIST_ENTRY(unp_deferral) ud_link; /* [U] */ 69 int ud_n; /* [I] */ 70 /* followed by ud_n struct fdpass */ 71 struct fdpass ud_fp[]; /* [I] */ 72 }; 73 74 void uipc_setaddr(const struct unpcb *, struct mbuf *); 75 void unp_discard(struct fdpass *, int); 76 void unp_mark(struct fdpass *, int); 77 void unp_scan(struct mbuf *, void (*)(struct fdpass *, int)); 78 int unp_nam2sun(struct mbuf *, struct sockaddr_un **, size_t *); 79 80 struct pool unpcb_pool; 81 struct task unp_gc_task = TASK_INITIALIZER(unp_gc, NULL); 82 83 /* 84 * Unix communications domain. 85 * 86 * TODO: 87 * RDM 88 * rethink name space problems 89 * need a proper out-of-band 90 */ 91 const struct sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX }; 92 93 /* [U] list of all UNIX domain sockets, for unp_gc() */ 94 LIST_HEAD(unp_head, unpcb) unp_head = 95 LIST_HEAD_INITIALIZER(unp_head); 96 /* [U] list of sets of files that were sent over sockets that are now closed */ 97 SLIST_HEAD(,unp_deferral) unp_deferred = 98 SLIST_HEAD_INITIALIZER(unp_deferred); 99 100 ino_t unp_ino; /* [U] prototype for fake inode numbers */ 101 int unp_rights; /* [U] file descriptors in flight */ 102 int unp_defer; /* [U] number of deferred fp to close by the GC task */ 103 int unp_gcing; /* [U] GC task currently running */ 104 105 void 106 unp_init(void) 107 { 108 pool_init(&unpcb_pool, sizeof(struct unpcb), 0, 109 IPL_SOFTNET, 0, "unpcb", NULL); 110 } 111 112 void 113 uipc_setaddr(const struct unpcb *unp, struct mbuf *nam) 114 { 115 if (unp != NULL && unp->unp_addr != NULL) { 116 nam->m_len = unp->unp_addr->m_len; 117 memcpy(mtod(nam, caddr_t), mtod(unp->unp_addr, caddr_t), 118 nam->m_len); 119 } else { 120 nam->m_len = sizeof(sun_noname); 121 memcpy(mtod(nam, struct sockaddr *), &sun_noname, 122 nam->m_len); 123 } 124 } 125 126 int 127 uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 128 struct mbuf *control, struct proc *p) 129 { 130 struct unpcb *unp = sotounpcb(so); 131 struct unpcb *unp2; 132 struct socket *so2; 133 int error = 0; 134 135 if (req == PRU_CONTROL) 136 return (EOPNOTSUPP); 137 if (req != PRU_SEND && control && control->m_len) { 138 error = EOPNOTSUPP; 139 goto release; 140 } 141 if (unp == NULL) { 142 error = EINVAL; 143 goto release; 144 } 145 146 NET_ASSERT_UNLOCKED(); 147 148 switch (req) { 149 150 case PRU_BIND: 151 error = unp_bind(unp, nam, p); 152 break; 153 154 case PRU_LISTEN: 155 if (unp->unp_vnode == NULL) 156 error = EINVAL; 157 break; 158 159 case PRU_CONNECT: 160 error = unp_connect(so, nam, p); 161 break; 162 163 case PRU_CONNECT2: 164 error = unp_connect2(so, (struct socket *)nam); 165 if (!error) { 166 unp->unp_connid.uid = p->p_ucred->cr_uid; 167 unp->unp_connid.gid = p->p_ucred->cr_gid; 168 unp->unp_connid.pid = p->p_p->ps_pid; 169 unp->unp_flags |= UNP_FEIDS; 170 unp2 = sotounpcb((struct socket *)nam); 171 unp2->unp_connid.uid = p->p_ucred->cr_uid; 172 unp2->unp_connid.gid = p->p_ucred->cr_gid; 173 unp2->unp_connid.pid = p->p_p->ps_pid; 174 unp2->unp_flags |= UNP_FEIDS; 175 } 176 break; 177 178 case PRU_DISCONNECT: 179 unp_disconnect(unp); 180 break; 181 182 case PRU_ACCEPT: 183 /* 184 * Pass back name of connected socket, 185 * if it was bound and we are still connected 186 * (our peer may have closed already!). 187 */ 188 uipc_setaddr(unp->unp_conn, nam); 189 break; 190 191 case PRU_SHUTDOWN: 192 socantsendmore(so); 193 unp_shutdown(unp); 194 break; 195 196 case PRU_RCVD: 197 switch (so->so_type) { 198 199 case SOCK_DGRAM: 200 panic("uipc 1"); 201 /*NOTREACHED*/ 202 203 case SOCK_STREAM: 204 case SOCK_SEQPACKET: 205 if (unp->unp_conn == NULL) 206 break; 207 so2 = unp->unp_conn->unp_socket; 208 /* 209 * Adjust backpressure on sender 210 * and wakeup any waiting to write. 211 */ 212 so2->so_snd.sb_mbcnt = so->so_rcv.sb_mbcnt; 213 so2->so_snd.sb_cc = so->so_rcv.sb_cc; 214 sowwakeup(so2); 215 break; 216 217 default: 218 panic("uipc 2"); 219 } 220 break; 221 222 case PRU_SEND: 223 if (control && (error = unp_internalize(control, p))) 224 break; 225 switch (so->so_type) { 226 227 case SOCK_DGRAM: { 228 const struct sockaddr *from; 229 230 if (nam) { 231 if (unp->unp_conn) { 232 error = EISCONN; 233 break; 234 } 235 error = unp_connect(so, nam, p); 236 if (error) 237 break; 238 } else { 239 if (unp->unp_conn == NULL) { 240 error = ENOTCONN; 241 break; 242 } 243 } 244 so2 = unp->unp_conn->unp_socket; 245 if (unp->unp_addr) 246 from = mtod(unp->unp_addr, struct sockaddr *); 247 else 248 from = &sun_noname; 249 if (sbappendaddr(so2, &so2->so_rcv, from, m, control)) { 250 sorwakeup(so2); 251 m = NULL; 252 control = NULL; 253 } else 254 error = ENOBUFS; 255 if (nam) 256 unp_disconnect(unp); 257 break; 258 } 259 260 case SOCK_STREAM: 261 case SOCK_SEQPACKET: 262 if (so->so_state & SS_CANTSENDMORE) { 263 error = EPIPE; 264 break; 265 } 266 if (unp->unp_conn == NULL) { 267 error = ENOTCONN; 268 break; 269 } 270 so2 = unp->unp_conn->unp_socket; 271 /* 272 * Send to paired receive port, and then raise 273 * send buffer counts to maintain backpressure. 274 * Wake up readers. 275 */ 276 if (control) { 277 if (sbappendcontrol(so2, &so2->so_rcv, m, 278 control)) { 279 control = NULL; 280 } else { 281 error = ENOBUFS; 282 break; 283 } 284 } else if (so->so_type == SOCK_SEQPACKET) 285 sbappendrecord(so2, &so2->so_rcv, m); 286 else 287 sbappend(so2, &so2->so_rcv, m); 288 so->so_snd.sb_mbcnt = so2->so_rcv.sb_mbcnt; 289 so->so_snd.sb_cc = so2->so_rcv.sb_cc; 290 if (so2->so_rcv.sb_cc > 0) 291 sorwakeup(so2); 292 m = NULL; 293 break; 294 295 default: 296 panic("uipc 4"); 297 } 298 /* we need to undo unp_internalize in case of errors */ 299 if (control && error) 300 unp_dispose(control); 301 break; 302 303 case PRU_ABORT: 304 unp_drop(unp, ECONNABORTED); 305 break; 306 307 case PRU_SENSE: { 308 struct stat *sb = (struct stat *)m; 309 310 sb->st_blksize = so->so_snd.sb_hiwat; 311 sb->st_dev = NODEV; 312 if (unp->unp_ino == 0) 313 unp->unp_ino = unp_ino++; 314 sb->st_atim.tv_sec = 315 sb->st_mtim.tv_sec = 316 sb->st_ctim.tv_sec = unp->unp_ctime.tv_sec; 317 sb->st_atim.tv_nsec = 318 sb->st_mtim.tv_nsec = 319 sb->st_ctim.tv_nsec = unp->unp_ctime.tv_nsec; 320 sb->st_ino = unp->unp_ino; 321 break; 322 } 323 324 case PRU_RCVOOB: 325 case PRU_SENDOOB: 326 error = EOPNOTSUPP; 327 break; 328 329 case PRU_SOCKADDR: 330 uipc_setaddr(unp, nam); 331 break; 332 333 case PRU_PEERADDR: 334 uipc_setaddr(unp->unp_conn, nam); 335 break; 336 337 case PRU_SLOWTIMO: 338 break; 339 340 default: 341 panic("uipc_usrreq"); 342 } 343 release: 344 if (req != PRU_RCVD && req != PRU_RCVOOB && req != PRU_SENSE) { 345 m_freem(control); 346 m_freem(m); 347 } 348 return (error); 349 } 350 351 /* 352 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 353 * for stream sockets, although the total for sender and receiver is 354 * actually only PIPSIZ. 355 * Datagram sockets really use the sendspace as the maximum datagram size, 356 * and don't really want to reserve the sendspace. Their recvspace should 357 * be large enough for at least one max-size datagram plus address. 358 */ 359 #define PIPSIZ 4096 360 u_long unpst_sendspace = PIPSIZ; 361 u_long unpst_recvspace = PIPSIZ; 362 u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 363 u_long unpdg_recvspace = 4*1024; 364 365 int 366 uipc_attach(struct socket *so, int proto) 367 { 368 struct unpcb *unp; 369 int error; 370 371 rw_assert_wrlock(&unp_lock); 372 373 if (so->so_pcb) 374 return EISCONN; 375 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 376 switch (so->so_type) { 377 378 case SOCK_STREAM: 379 case SOCK_SEQPACKET: 380 error = soreserve(so, unpst_sendspace, unpst_recvspace); 381 break; 382 383 case SOCK_DGRAM: 384 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 385 break; 386 387 default: 388 panic("unp_attach"); 389 } 390 if (error) 391 return (error); 392 } 393 unp = pool_get(&unpcb_pool, PR_NOWAIT|PR_ZERO); 394 if (unp == NULL) 395 return (ENOBUFS); 396 unp->unp_socket = so; 397 so->so_pcb = unp; 398 getnanotime(&unp->unp_ctime); 399 LIST_INSERT_HEAD(&unp_head, unp, unp_link); 400 return (0); 401 } 402 403 int 404 uipc_detach(struct socket *so) 405 { 406 struct unpcb *unp = sotounpcb(so); 407 408 if (unp == NULL) 409 return (EINVAL); 410 411 NET_ASSERT_UNLOCKED(); 412 413 unp_detach(unp); 414 415 return (0); 416 } 417 418 void 419 unp_detach(struct unpcb *unp) 420 { 421 struct socket *so = unp->unp_socket; 422 struct vnode *vp = NULL; 423 424 rw_assert_wrlock(&unp_lock); 425 426 LIST_REMOVE(unp, unp_link); 427 if (unp->unp_vnode) { 428 /* 429 * `v_socket' is only read in unp_connect and 430 * unplock prevents concurrent access. 431 */ 432 433 unp->unp_vnode->v_socket = NULL; 434 vp = unp->unp_vnode; 435 unp->unp_vnode = NULL; 436 } 437 438 if (unp->unp_conn) 439 unp_disconnect(unp); 440 while (!SLIST_EMPTY(&unp->unp_refs)) 441 unp_drop(SLIST_FIRST(&unp->unp_refs), ECONNRESET); 442 soisdisconnected(so); 443 so->so_pcb = NULL; 444 m_freem(unp->unp_addr); 445 pool_put(&unpcb_pool, unp); 446 if (unp_rights) 447 task_add(systqmp, &unp_gc_task); 448 449 if (vp != NULL) { 450 /* 451 * Enforce `i_lock' -> `unplock' because fifo subsystem 452 * requires it. The socket can't be closed concurrently 453 * because the file descriptor reference is 454 * still hold. 455 */ 456 457 sounlock(so, SL_LOCKED); 458 KERNEL_LOCK(); 459 vrele(vp); 460 KERNEL_UNLOCK(); 461 solock(so); 462 } 463 } 464 465 int 466 unp_bind(struct unpcb *unp, struct mbuf *nam, struct proc *p) 467 { 468 struct sockaddr_un *soun; 469 struct mbuf *nam2; 470 struct vnode *vp; 471 struct vattr vattr; 472 int error; 473 struct nameidata nd; 474 size_t pathlen; 475 476 if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING)) 477 return (EINVAL); 478 if (unp->unp_vnode != NULL) 479 return (EINVAL); 480 if ((error = unp_nam2sun(nam, &soun, &pathlen))) 481 return (error); 482 483 nam2 = m_getclr(M_WAITOK, MT_SONAME); 484 nam2->m_len = sizeof(struct sockaddr_un); 485 memcpy(mtod(nam2, struct sockaddr_un *), soun, 486 offsetof(struct sockaddr_un, sun_path) + pathlen); 487 /* No need to NUL terminate: m_getclr() returns zero'd mbufs. */ 488 489 soun = mtod(nam2, struct sockaddr_un *); 490 491 /* Fixup sun_len to keep it in sync with m_len. */ 492 soun->sun_len = nam2->m_len; 493 494 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE, 495 soun->sun_path, p); 496 nd.ni_pledge = PLEDGE_UNIX; 497 498 unp->unp_flags |= UNP_BINDING; 499 500 /* 501 * Enforce `i_lock' -> `unplock' because fifo subsystem 502 * requires it. The socket can't be closed concurrently 503 * because the file descriptor reference is still held. 504 */ 505 506 sounlock(unp->unp_socket, SL_LOCKED); 507 508 KERNEL_LOCK(); 509 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 510 error = namei(&nd); 511 if (error != 0) { 512 m_freem(nam2); 513 solock(unp->unp_socket); 514 goto out; 515 } 516 vp = nd.ni_vp; 517 if (vp != NULL) { 518 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 519 if (nd.ni_dvp == vp) 520 vrele(nd.ni_dvp); 521 else 522 vput(nd.ni_dvp); 523 vrele(vp); 524 m_freem(nam2); 525 error = EADDRINUSE; 526 solock(unp->unp_socket); 527 goto out; 528 } 529 VATTR_NULL(&vattr); 530 vattr.va_type = VSOCK; 531 vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask; 532 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 533 vput(nd.ni_dvp); 534 if (error) { 535 m_freem(nam2); 536 solock(unp->unp_socket); 537 goto out; 538 } 539 solock(unp->unp_socket); 540 unp->unp_addr = nam2; 541 vp = nd.ni_vp; 542 vp->v_socket = unp->unp_socket; 543 unp->unp_vnode = vp; 544 unp->unp_connid.uid = p->p_ucred->cr_uid; 545 unp->unp_connid.gid = p->p_ucred->cr_gid; 546 unp->unp_connid.pid = p->p_p->ps_pid; 547 unp->unp_flags |= UNP_FEIDSBIND; 548 VOP_UNLOCK(vp); 549 out: 550 KERNEL_UNLOCK(); 551 unp->unp_flags &= ~UNP_BINDING; 552 553 return (error); 554 } 555 556 int 557 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p) 558 { 559 struct sockaddr_un *soun; 560 struct vnode *vp; 561 struct socket *so2, *so3; 562 struct unpcb *unp, *unp2, *unp3; 563 struct nameidata nd; 564 int error; 565 566 unp = sotounpcb(so); 567 if (unp->unp_flags & (UNP_BINDING | UNP_CONNECTING)) 568 return (EISCONN); 569 if ((error = unp_nam2sun(nam, &soun, NULL))) 570 return (error); 571 572 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p); 573 nd.ni_pledge = PLEDGE_UNIX; 574 575 unp->unp_flags |= UNP_CONNECTING; 576 577 /* 578 * Enforce `i_lock' -> `unplock' because fifo subsystem 579 * requires it. The socket can't be closed concurrently 580 * because the file descriptor reference is still held. 581 */ 582 583 sounlock(so, SL_LOCKED); 584 585 KERNEL_LOCK(); 586 error = namei(&nd); 587 if (error != 0) 588 goto unlock; 589 vp = nd.ni_vp; 590 if (vp->v_type != VSOCK) { 591 error = ENOTSOCK; 592 goto put; 593 } 594 if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0) 595 goto put; 596 solock(so); 597 so2 = vp->v_socket; 598 if (so2 == NULL) { 599 error = ECONNREFUSED; 600 goto put_locked; 601 } 602 if (so->so_type != so2->so_type) { 603 error = EPROTOTYPE; 604 goto put_locked; 605 } 606 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 607 if ((so2->so_options & SO_ACCEPTCONN) == 0 || 608 (so3 = sonewconn(so2, 0)) == 0) { 609 error = ECONNREFUSED; 610 goto put_locked; 611 } 612 unp2 = sotounpcb(so2); 613 unp3 = sotounpcb(so3); 614 if (unp2->unp_addr) 615 unp3->unp_addr = 616 m_copym(unp2->unp_addr, 0, M_COPYALL, M_NOWAIT); 617 unp3->unp_connid.uid = p->p_ucred->cr_uid; 618 unp3->unp_connid.gid = p->p_ucred->cr_gid; 619 unp3->unp_connid.pid = p->p_p->ps_pid; 620 unp3->unp_flags |= UNP_FEIDS; 621 so2 = so3; 622 if (unp2->unp_flags & UNP_FEIDSBIND) { 623 unp->unp_connid = unp2->unp_connid; 624 unp->unp_flags |= UNP_FEIDS; 625 } 626 } 627 error = unp_connect2(so, so2); 628 put_locked: 629 sounlock(so, SL_LOCKED); 630 put: 631 vput(vp); 632 unlock: 633 KERNEL_UNLOCK(); 634 solock(so); 635 unp->unp_flags &= ~UNP_CONNECTING; 636 637 return (error); 638 } 639 640 int 641 unp_connect2(struct socket *so, struct socket *so2) 642 { 643 struct unpcb *unp = sotounpcb(so); 644 struct unpcb *unp2; 645 646 rw_assert_wrlock(&unp_lock); 647 648 if (so2->so_type != so->so_type) 649 return (EPROTOTYPE); 650 unp2 = sotounpcb(so2); 651 unp->unp_conn = unp2; 652 switch (so->so_type) { 653 654 case SOCK_DGRAM: 655 SLIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_nextref); 656 soisconnected(so); 657 break; 658 659 case SOCK_STREAM: 660 case SOCK_SEQPACKET: 661 unp2->unp_conn = unp; 662 soisconnected(so); 663 soisconnected(so2); 664 break; 665 666 default: 667 panic("unp_connect2"); 668 } 669 return (0); 670 } 671 672 void 673 unp_disconnect(struct unpcb *unp) 674 { 675 struct unpcb *unp2 = unp->unp_conn; 676 677 if (unp2 == NULL) 678 return; 679 unp->unp_conn = NULL; 680 switch (unp->unp_socket->so_type) { 681 682 case SOCK_DGRAM: 683 SLIST_REMOVE(&unp2->unp_refs, unp, unpcb, unp_nextref); 684 unp->unp_socket->so_state &= ~SS_ISCONNECTED; 685 break; 686 687 case SOCK_STREAM: 688 case SOCK_SEQPACKET: 689 unp->unp_socket->so_snd.sb_mbcnt = 0; 690 unp->unp_socket->so_snd.sb_cc = 0; 691 soisdisconnected(unp->unp_socket); 692 unp2->unp_conn = NULL; 693 unp2->unp_socket->so_snd.sb_mbcnt = 0; 694 unp2->unp_socket->so_snd.sb_cc = 0; 695 soisdisconnected(unp2->unp_socket); 696 break; 697 } 698 } 699 700 void 701 unp_shutdown(struct unpcb *unp) 702 { 703 struct socket *so; 704 705 switch (unp->unp_socket->so_type) { 706 case SOCK_STREAM: 707 case SOCK_SEQPACKET: 708 if (unp->unp_conn && (so = unp->unp_conn->unp_socket)) 709 socantrcvmore(so); 710 break; 711 default: 712 break; 713 } 714 } 715 716 void 717 unp_drop(struct unpcb *unp, int errno) 718 { 719 struct socket *so = unp->unp_socket; 720 721 rw_assert_wrlock(&unp_lock); 722 723 so->so_error = errno; 724 unp_disconnect(unp); 725 if (so->so_head) { 726 so->so_pcb = NULL; 727 /* 728 * As long as `unp_lock' is taken before entering 729 * uipc_usrreq() releasing it here would lead to a 730 * double unlock. 731 */ 732 sofree(so, SL_NOUNLOCK); 733 m_freem(unp->unp_addr); 734 pool_put(&unpcb_pool, unp); 735 } 736 } 737 738 #ifdef notdef 739 unp_drain(void) 740 { 741 742 } 743 #endif 744 745 extern struct domain unixdomain; 746 747 static struct unpcb * 748 fptounp(struct file *fp) 749 { 750 struct socket *so; 751 752 if (fp->f_type != DTYPE_SOCKET) 753 return (NULL); 754 if ((so = fp->f_data) == NULL) 755 return (NULL); 756 if (so->so_proto->pr_domain != &unixdomain) 757 return (NULL); 758 return (sotounpcb(so)); 759 } 760 761 int 762 unp_externalize(struct mbuf *rights, socklen_t controllen, int flags) 763 { 764 struct proc *p = curproc; /* XXX */ 765 struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 766 struct filedesc *fdp = p->p_fd; 767 int i, *fds = NULL; 768 struct fdpass *rp; 769 struct file *fp; 770 int nfds, error = 0; 771 772 rw_assert_wrlock(&unp_lock); 773 774 /* 775 * This code only works because SCM_RIGHTS is the only supported 776 * control message type on unix sockets. Enforce this here. 777 */ 778 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET) 779 return EINVAL; 780 781 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / 782 sizeof(struct fdpass); 783 if (controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) 784 controllen = 0; 785 else 786 controllen -= CMSG_ALIGN(sizeof(struct cmsghdr)); 787 if (nfds > controllen / sizeof(int)) { 788 error = EMSGSIZE; 789 goto restart; 790 } 791 792 /* Make sure the recipient should be able to see the descriptors.. */ 793 rp = (struct fdpass *)CMSG_DATA(cm); 794 795 /* fdp->fd_rdir requires KERNEL_LOCK() */ 796 KERNEL_LOCK(); 797 798 for (i = 0; i < nfds; i++) { 799 fp = rp->fp; 800 rp++; 801 error = pledge_recvfd(p, fp); 802 if (error) 803 break; 804 805 /* 806 * No to block devices. If passing a directory, 807 * make sure that it is underneath the root. 808 */ 809 if (fdp->fd_rdir != NULL && fp->f_type == DTYPE_VNODE) { 810 struct vnode *vp = (struct vnode *)fp->f_data; 811 812 if (vp->v_type == VBLK || 813 (vp->v_type == VDIR && 814 !vn_isunder(vp, fdp->fd_rdir, p))) { 815 error = EPERM; 816 break; 817 } 818 } 819 } 820 821 KERNEL_UNLOCK(); 822 823 fds = mallocarray(nfds, sizeof(int), M_TEMP, M_WAITOK); 824 825 restart: 826 fdplock(fdp); 827 if (error != 0) { 828 if (nfds > 0) { 829 rp = ((struct fdpass *)CMSG_DATA(cm)); 830 unp_discard(rp, nfds); 831 } 832 goto out; 833 } 834 835 /* 836 * First loop -- allocate file descriptor table slots for the 837 * new descriptors. 838 */ 839 rp = ((struct fdpass *)CMSG_DATA(cm)); 840 for (i = 0; i < nfds; i++) { 841 if ((error = fdalloc(p, 0, &fds[i])) != 0) { 842 /* 843 * Back out what we've done so far. 844 */ 845 for (--i; i >= 0; i--) 846 fdremove(fdp, fds[i]); 847 848 if (error == ENOSPC) { 849 fdexpand(p); 850 error = 0; 851 } else { 852 /* 853 * This is the error that has historically 854 * been returned, and some callers may 855 * expect it. 856 */ 857 error = EMSGSIZE; 858 } 859 fdpunlock(fdp); 860 goto restart; 861 } 862 863 /* 864 * Make the slot reference the descriptor so that 865 * fdalloc() works properly.. We finalize it all 866 * in the loop below. 867 */ 868 mtx_enter(&fdp->fd_fplock); 869 KASSERT(fdp->fd_ofiles[fds[i]] == NULL); 870 fdp->fd_ofiles[fds[i]] = rp->fp; 871 mtx_leave(&fdp->fd_fplock); 872 873 fdp->fd_ofileflags[fds[i]] = (rp->flags & UF_PLEDGED); 874 if (flags & MSG_CMSG_CLOEXEC) 875 fdp->fd_ofileflags[fds[i]] |= UF_EXCLOSE; 876 877 rp++; 878 } 879 880 /* 881 * Now that adding them has succeeded, update all of the 882 * descriptor passing state. 883 */ 884 rp = (struct fdpass *)CMSG_DATA(cm); 885 for (i = 0; i < nfds; i++) { 886 struct unpcb *unp; 887 888 fp = rp->fp; 889 rp++; 890 if ((unp = fptounp(fp)) != NULL) 891 unp->unp_msgcount--; 892 unp_rights--; 893 } 894 895 /* 896 * Copy temporary array to message and adjust length, in case of 897 * transition from large struct file pointers to ints. 898 */ 899 memcpy(CMSG_DATA(cm), fds, nfds * sizeof(int)); 900 cm->cmsg_len = CMSG_LEN(nfds * sizeof(int)); 901 rights->m_len = CMSG_LEN(nfds * sizeof(int)); 902 out: 903 fdpunlock(fdp); 904 if (fds != NULL) 905 free(fds, M_TEMP, nfds * sizeof(int)); 906 return (error); 907 } 908 909 int 910 unp_internalize(struct mbuf *control, struct proc *p) 911 { 912 struct filedesc *fdp = p->p_fd; 913 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 914 struct fdpass *rp; 915 struct file *fp; 916 struct unpcb *unp; 917 int i, error; 918 int nfds, *ip, fd, neededspace; 919 920 rw_assert_wrlock(&unp_lock); 921 922 /* 923 * Check for two potential msg_controllen values because 924 * IETF stuck their nose in a place it does not belong. 925 */ 926 if (control->m_len < CMSG_LEN(0) || cm->cmsg_len < CMSG_LEN(0)) 927 return (EINVAL); 928 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || 929 !(cm->cmsg_len == control->m_len || 930 control->m_len == CMSG_ALIGN(cm->cmsg_len))) 931 return (EINVAL); 932 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int); 933 934 if (unp_rights + nfds > maxfiles / 10) 935 return (EMFILE); 936 937 /* Make sure we have room for the struct file pointers */ 938 morespace: 939 neededspace = CMSG_SPACE(nfds * sizeof(struct fdpass)) - 940 control->m_len; 941 if (neededspace > m_trailingspace(control)) { 942 char *tmp; 943 /* if we already have a cluster, the message is just too big */ 944 if (control->m_flags & M_EXT) 945 return (E2BIG); 946 947 /* copy cmsg data temporarily out of the mbuf */ 948 tmp = malloc(control->m_len, M_TEMP, M_WAITOK); 949 memcpy(tmp, mtod(control, caddr_t), control->m_len); 950 951 /* allocate a cluster and try again */ 952 MCLGET(control, M_WAIT); 953 if ((control->m_flags & M_EXT) == 0) { 954 free(tmp, M_TEMP, control->m_len); 955 return (ENOBUFS); /* allocation failed */ 956 } 957 958 /* copy the data back into the cluster */ 959 cm = mtod(control, struct cmsghdr *); 960 memcpy(cm, tmp, control->m_len); 961 free(tmp, M_TEMP, control->m_len); 962 goto morespace; 963 } 964 965 /* adjust message & mbuf to note amount of space actually used. */ 966 cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct fdpass)); 967 control->m_len = CMSG_SPACE(nfds * sizeof(struct fdpass)); 968 969 ip = ((int *)CMSG_DATA(cm)) + nfds - 1; 970 rp = ((struct fdpass *)CMSG_DATA(cm)) + nfds - 1; 971 fdplock(fdp); 972 for (i = 0; i < nfds; i++) { 973 memcpy(&fd, ip, sizeof fd); 974 ip--; 975 if ((fp = fd_getfile(fdp, fd)) == NULL) { 976 error = EBADF; 977 goto fail; 978 } 979 if (fp->f_count >= FDUP_MAX_COUNT) { 980 error = EDEADLK; 981 goto fail; 982 } 983 error = pledge_sendfd(p, fp); 984 if (error) 985 goto fail; 986 987 /* kqueue descriptors cannot be copied */ 988 if (fp->f_type == DTYPE_KQUEUE) { 989 error = EINVAL; 990 goto fail; 991 } 992 rp->fp = fp; 993 rp->flags = fdp->fd_ofileflags[fd] & UF_PLEDGED; 994 rp--; 995 if ((unp = fptounp(fp)) != NULL) { 996 unp->unp_file = fp; 997 unp->unp_msgcount++; 998 } 999 unp_rights++; 1000 } 1001 fdpunlock(fdp); 1002 return (0); 1003 fail: 1004 fdpunlock(fdp); 1005 if (fp != NULL) 1006 FRELE(fp, p); 1007 /* Back out what we just did. */ 1008 for ( ; i > 0; i--) { 1009 rp++; 1010 fp = rp->fp; 1011 if ((unp = fptounp(fp)) != NULL) 1012 unp->unp_msgcount--; 1013 FRELE(fp, p); 1014 unp_rights--; 1015 } 1016 1017 return (error); 1018 } 1019 1020 void 1021 unp_gc(void *arg __unused) 1022 { 1023 struct unp_deferral *defer; 1024 struct file *fp; 1025 struct socket *so; 1026 struct unpcb *unp; 1027 int nunref, i; 1028 1029 rw_enter_write(&unp_lock); 1030 1031 if (unp_gcing) 1032 goto unlock; 1033 unp_gcing = 1; 1034 1035 /* close any fds on the deferred list */ 1036 while ((defer = SLIST_FIRST(&unp_deferred)) != NULL) { 1037 SLIST_REMOVE_HEAD(&unp_deferred, ud_link); 1038 for (i = 0; i < defer->ud_n; i++) { 1039 fp = defer->ud_fp[i].fp; 1040 if (fp == NULL) 1041 continue; 1042 /* closef() expects a refcount of 2 */ 1043 FREF(fp); 1044 if ((unp = fptounp(fp)) != NULL) 1045 unp->unp_msgcount--; 1046 unp_rights--; 1047 rw_exit_write(&unp_lock); 1048 (void) closef(fp, NULL); 1049 rw_enter_write(&unp_lock); 1050 } 1051 free(defer, M_TEMP, sizeof(*defer) + 1052 sizeof(struct fdpass) * defer->ud_n); 1053 } 1054 1055 unp_defer = 0; 1056 LIST_FOREACH(unp, &unp_head, unp_link) 1057 unp->unp_flags &= ~(UNP_GCMARK | UNP_GCDEFER | UNP_GCDEAD); 1058 do { 1059 nunref = 0; 1060 LIST_FOREACH(unp, &unp_head, unp_link) { 1061 fp = unp->unp_file; 1062 if (unp->unp_flags & UNP_GCDEFER) { 1063 /* 1064 * This socket is referenced by another 1065 * socket which is known to be live, 1066 * so it's certainly live. 1067 */ 1068 unp->unp_flags &= ~UNP_GCDEFER; 1069 unp_defer--; 1070 } else if (unp->unp_flags & UNP_GCMARK) { 1071 /* marked as live in previous pass */ 1072 continue; 1073 } else if (fp == NULL) { 1074 /* not being passed, so can't be in loop */ 1075 } else if (fp->f_count == 0) { 1076 /* 1077 * Already being closed, let normal close 1078 * path take its course 1079 */ 1080 } else { 1081 /* 1082 * Unreferenced by other sockets so far, 1083 * so if all the references (f_count) are 1084 * from passing (unp_msgcount) then this 1085 * socket is prospectively dead 1086 */ 1087 if (fp->f_count == unp->unp_msgcount) { 1088 nunref++; 1089 unp->unp_flags |= UNP_GCDEAD; 1090 continue; 1091 } 1092 } 1093 1094 /* 1095 * This is the first time we've seen this socket on 1096 * the mark pass and known it has a live reference, 1097 * so mark it, then scan its receive buffer for 1098 * sockets and note them as deferred (== referenced, 1099 * but not yet marked). 1100 */ 1101 unp->unp_flags |= UNP_GCMARK; 1102 1103 so = unp->unp_socket; 1104 unp_scan(so->so_rcv.sb_mb, unp_mark); 1105 } 1106 } while (unp_defer); 1107 1108 /* 1109 * If there are any unreferenced sockets, then for each dispose 1110 * of files in its receive buffer and then close it. 1111 */ 1112 if (nunref) { 1113 LIST_FOREACH(unp, &unp_head, unp_link) { 1114 if (unp->unp_flags & UNP_GCDEAD) 1115 unp_scan(unp->unp_socket->so_rcv.sb_mb, 1116 unp_discard); 1117 } 1118 } 1119 unp_gcing = 0; 1120 unlock: 1121 rw_exit_write(&unp_lock); 1122 } 1123 1124 void 1125 unp_dispose(struct mbuf *m) 1126 { 1127 1128 if (m) 1129 unp_scan(m, unp_discard); 1130 } 1131 1132 void 1133 unp_scan(struct mbuf *m0, void (*op)(struct fdpass *, int)) 1134 { 1135 struct mbuf *m; 1136 struct fdpass *rp; 1137 struct cmsghdr *cm; 1138 int qfds; 1139 1140 while (m0) { 1141 for (m = m0; m; m = m->m_next) { 1142 if (m->m_type == MT_CONTROL && 1143 m->m_len >= sizeof(*cm)) { 1144 cm = mtod(m, struct cmsghdr *); 1145 if (cm->cmsg_level != SOL_SOCKET || 1146 cm->cmsg_type != SCM_RIGHTS) 1147 continue; 1148 qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm)) 1149 / sizeof(struct fdpass); 1150 if (qfds > 0) { 1151 rp = (struct fdpass *)CMSG_DATA(cm); 1152 op(rp, qfds); 1153 } 1154 break; /* XXX, but saves time */ 1155 } 1156 } 1157 m0 = m0->m_nextpkt; 1158 } 1159 } 1160 1161 void 1162 unp_mark(struct fdpass *rp, int nfds) 1163 { 1164 struct unpcb *unp; 1165 int i; 1166 1167 rw_assert_wrlock(&unp_lock); 1168 1169 for (i = 0; i < nfds; i++) { 1170 if (rp[i].fp == NULL) 1171 continue; 1172 1173 unp = fptounp(rp[i].fp); 1174 if (unp == NULL) 1175 continue; 1176 1177 if (unp->unp_flags & (UNP_GCMARK|UNP_GCDEFER)) 1178 continue; 1179 1180 unp_defer++; 1181 unp->unp_flags |= UNP_GCDEFER; 1182 unp->unp_flags &= ~UNP_GCDEAD; 1183 } 1184 } 1185 1186 void 1187 unp_discard(struct fdpass *rp, int nfds) 1188 { 1189 struct unp_deferral *defer; 1190 1191 rw_assert_wrlock(&unp_lock); 1192 1193 /* copy the file pointers to a deferral structure */ 1194 defer = malloc(sizeof(*defer) + sizeof(*rp) * nfds, M_TEMP, M_WAITOK); 1195 defer->ud_n = nfds; 1196 memcpy(&defer->ud_fp[0], rp, sizeof(*rp) * nfds); 1197 memset(rp, 0, sizeof(*rp) * nfds); 1198 SLIST_INSERT_HEAD(&unp_deferred, defer, ud_link); 1199 1200 task_add(systqmp, &unp_gc_task); 1201 } 1202 1203 int 1204 unp_nam2sun(struct mbuf *nam, struct sockaddr_un **sun, size_t *pathlen) 1205 { 1206 struct sockaddr *sa = mtod(nam, struct sockaddr *); 1207 size_t size, len; 1208 1209 if (nam->m_len < offsetof(struct sockaddr, sa_data)) 1210 return EINVAL; 1211 if (sa->sa_family != AF_UNIX) 1212 return EAFNOSUPPORT; 1213 if (sa->sa_len != nam->m_len) 1214 return EINVAL; 1215 if (sa->sa_len > sizeof(struct sockaddr_un)) 1216 return EINVAL; 1217 *sun = (struct sockaddr_un *)sa; 1218 1219 /* ensure that sun_path is NUL terminated and fits */ 1220 size = (*sun)->sun_len - offsetof(struct sockaddr_un, sun_path); 1221 len = strnlen((*sun)->sun_path, size); 1222 if (len == sizeof((*sun)->sun_path)) 1223 return EINVAL; 1224 if (len == size) { 1225 if (m_trailingspace(nam) == 0) 1226 return EINVAL; 1227 nam->m_len++; 1228 (*sun)->sun_len++; 1229 (*sun)->sun_path[len] = '\0'; 1230 } 1231 if (pathlen != NULL) 1232 *pathlen = len; 1233 1234 return 0; 1235 } 1236