1 /* $OpenBSD: uipc_usrreq.c,v 1.66 2012/04/26 17:18:17 matthew Exp $ */ 2 /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/filedesc.h> 39 #include <sys/domain.h> 40 #include <sys/protosw.h> 41 #include <sys/socket.h> 42 #include <sys/socketvar.h> 43 #include <sys/unpcb.h> 44 #include <sys/un.h> 45 #include <sys/namei.h> 46 #include <sys/vnode.h> 47 #include <sys/file.h> 48 #include <sys/stat.h> 49 #include <sys/mbuf.h> 50 51 /* 52 * Unix communications domain. 53 * 54 * TODO: 55 * RDM 56 * rethink name space problems 57 * need a proper out-of-band 58 */ 59 struct sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX }; 60 ino_t unp_ino; /* prototype for fake inode numbers */ 61 62 /*ARGSUSED*/ 63 int 64 uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 65 struct mbuf *control, struct proc *p) 66 { 67 struct unpcb *unp = sotounpcb(so); 68 struct socket *so2; 69 int error = 0; 70 71 if (req == PRU_CONTROL) 72 return (EOPNOTSUPP); 73 if (req != PRU_SEND && control && control->m_len) { 74 error = EOPNOTSUPP; 75 goto release; 76 } 77 if (unp == NULL && req != PRU_ATTACH) { 78 error = EINVAL; 79 goto release; 80 } 81 switch (req) { 82 83 case PRU_ATTACH: 84 if (unp) { 85 error = EISCONN; 86 break; 87 } 88 error = unp_attach(so); 89 break; 90 91 case PRU_DETACH: 92 unp_detach(unp); 93 break; 94 95 case PRU_BIND: 96 error = unp_bind(unp, nam, p); 97 break; 98 99 case PRU_LISTEN: 100 if (unp->unp_vnode == NULL) 101 error = EINVAL; 102 break; 103 104 case PRU_CONNECT: 105 error = unp_connect(so, nam, p); 106 break; 107 108 case PRU_CONNECT2: 109 error = unp_connect2(so, (struct socket *)nam); 110 break; 111 112 case PRU_DISCONNECT: 113 unp_disconnect(unp); 114 break; 115 116 case PRU_ACCEPT: 117 /* 118 * Pass back name of connected socket, 119 * if it was bound and we are still connected 120 * (our peer may have closed already!). 121 */ 122 if (unp->unp_conn && unp->unp_conn->unp_addr) { 123 nam->m_len = unp->unp_conn->unp_addr->m_len; 124 bcopy(mtod(unp->unp_conn->unp_addr, caddr_t), 125 mtod(nam, caddr_t), nam->m_len); 126 } else { 127 nam->m_len = sizeof(sun_noname); 128 *(mtod(nam, struct sockaddr *)) = sun_noname; 129 } 130 break; 131 132 case PRU_SHUTDOWN: 133 socantsendmore(so); 134 unp_shutdown(unp); 135 break; 136 137 case PRU_RCVD: 138 switch (so->so_type) { 139 140 case SOCK_DGRAM: 141 panic("uipc 1"); 142 /*NOTREACHED*/ 143 144 case SOCK_STREAM: 145 case SOCK_SEQPACKET: 146 #define rcv (&so->so_rcv) 147 #define snd (&so2->so_snd) 148 if (unp->unp_conn == NULL) 149 break; 150 so2 = unp->unp_conn->unp_socket; 151 /* 152 * Adjust backpressure on sender 153 * and wakeup any waiting to write. 154 */ 155 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt; 156 unp->unp_mbcnt = rcv->sb_mbcnt; 157 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc; 158 unp->unp_cc = rcv->sb_cc; 159 sowwakeup(so2); 160 #undef snd 161 #undef rcv 162 break; 163 164 default: 165 panic("uipc 2"); 166 } 167 break; 168 169 case PRU_SEND: 170 if (control && (error = unp_internalize(control, p))) 171 break; 172 switch (so->so_type) { 173 174 case SOCK_DGRAM: { 175 struct sockaddr *from; 176 177 if (nam) { 178 if (unp->unp_conn) { 179 error = EISCONN; 180 break; 181 } 182 error = unp_connect(so, nam, p); 183 if (error) 184 break; 185 } else { 186 if (unp->unp_conn == NULL) { 187 error = ENOTCONN; 188 break; 189 } 190 } 191 so2 = unp->unp_conn->unp_socket; 192 if (unp->unp_addr) 193 from = mtod(unp->unp_addr, struct sockaddr *); 194 else 195 from = &sun_noname; 196 if (sbappendaddr(&so2->so_rcv, from, m, control)) { 197 sorwakeup(so2); 198 m = NULL; 199 control = NULL; 200 } else 201 error = ENOBUFS; 202 if (nam) 203 unp_disconnect(unp); 204 break; 205 } 206 207 case SOCK_STREAM: 208 case SOCK_SEQPACKET: 209 #define rcv (&so2->so_rcv) 210 #define snd (&so->so_snd) 211 if (so->so_state & SS_CANTSENDMORE) { 212 error = EPIPE; 213 break; 214 } 215 if (unp->unp_conn == NULL) { 216 error = ENOTCONN; 217 break; 218 } 219 so2 = unp->unp_conn->unp_socket; 220 /* 221 * Send to paired receive port, and then reduce 222 * send buffer hiwater marks to maintain backpressure. 223 * Wake up readers. 224 */ 225 if (control) { 226 if (sbappendcontrol(rcv, m, control)) 227 control = NULL; 228 } else if (so->so_type == SOCK_SEQPACKET) 229 sbappendrecord(rcv, m); 230 else 231 sbappend(rcv, m); 232 snd->sb_mbmax -= 233 rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt; 234 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt; 235 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc; 236 unp->unp_conn->unp_cc = rcv->sb_cc; 237 sorwakeup(so2); 238 m = NULL; 239 #undef snd 240 #undef rcv 241 break; 242 243 default: 244 panic("uipc 4"); 245 } 246 /* we need to undo unp_internalize in case of errors */ 247 if (control && error) 248 unp_dispose(control); 249 break; 250 251 case PRU_ABORT: 252 unp_drop(unp, ECONNABORTED); 253 break; 254 255 case PRU_SENSE: 256 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; 257 switch (so->so_type) { 258 case SOCK_STREAM: 259 case SOCK_SEQPACKET: 260 if (unp->unp_conn != NULL) { 261 so2 = unp->unp_conn->unp_socket; 262 ((struct stat *) m)->st_blksize += 263 so2->so_rcv.sb_cc; 264 } 265 break; 266 default: 267 break; 268 } 269 ((struct stat *) m)->st_dev = NODEV; 270 if (unp->unp_ino == 0) 271 unp->unp_ino = unp_ino++; 272 ((struct stat *) m)->st_atim = 273 ((struct stat *) m)->st_mtim = 274 ((struct stat *) m)->st_ctim = unp->unp_ctime; 275 ((struct stat *) m)->st_ino = unp->unp_ino; 276 return (0); 277 278 case PRU_RCVOOB: 279 return (EOPNOTSUPP); 280 281 case PRU_SENDOOB: 282 error = EOPNOTSUPP; 283 break; 284 285 case PRU_SOCKADDR: 286 if (unp->unp_addr) { 287 nam->m_len = unp->unp_addr->m_len; 288 bcopy(mtod(unp->unp_addr, caddr_t), 289 mtod(nam, caddr_t), nam->m_len); 290 } else 291 nam->m_len = 0; 292 break; 293 294 case PRU_PEERADDR: 295 if (unp->unp_conn && unp->unp_conn->unp_addr) { 296 nam->m_len = unp->unp_conn->unp_addr->m_len; 297 bcopy(mtod(unp->unp_conn->unp_addr, caddr_t), 298 mtod(nam, caddr_t), nam->m_len); 299 } else 300 nam->m_len = 0; 301 break; 302 303 case PRU_SLOWTIMO: 304 break; 305 306 default: 307 panic("piusrreq"); 308 } 309 release: 310 if (control) 311 m_freem(control); 312 if (m) 313 m_freem(m); 314 return (error); 315 } 316 317 /* 318 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 319 * for stream sockets, although the total for sender and receiver is 320 * actually only PIPSIZ. 321 * Datagram sockets really use the sendspace as the maximum datagram size, 322 * and don't really want to reserve the sendspace. Their recvspace should 323 * be large enough for at least one max-size datagram plus address. 324 */ 325 #define PIPSIZ 4096 326 u_long unpst_sendspace = PIPSIZ; 327 u_long unpst_recvspace = PIPSIZ; 328 u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 329 u_long unpdg_recvspace = 4*1024; 330 331 int unp_rights; /* file descriptors in flight */ 332 333 int 334 unp_attach(struct socket *so) 335 { 336 struct unpcb *unp; 337 int error; 338 339 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 340 switch (so->so_type) { 341 342 case SOCK_STREAM: 343 case SOCK_SEQPACKET: 344 error = soreserve(so, unpst_sendspace, unpst_recvspace); 345 break; 346 347 case SOCK_DGRAM: 348 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 349 break; 350 351 default: 352 panic("unp_attach"); 353 } 354 if (error) 355 return (error); 356 } 357 unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT|M_ZERO); 358 if (unp == NULL) 359 return (ENOBUFS); 360 unp->unp_socket = so; 361 so->so_pcb = unp; 362 getnanotime(&unp->unp_ctime); 363 return (0); 364 } 365 366 void 367 unp_detach(struct unpcb *unp) 368 { 369 370 if (unp->unp_vnode) { 371 unp->unp_vnode->v_socket = NULL; 372 vrele(unp->unp_vnode); 373 unp->unp_vnode = NULL; 374 } 375 if (unp->unp_conn) 376 unp_disconnect(unp); 377 while (unp->unp_refs) 378 unp_drop(unp->unp_refs, ECONNRESET); 379 soisdisconnected(unp->unp_socket); 380 unp->unp_socket->so_pcb = NULL; 381 m_freem(unp->unp_addr); 382 if (unp_rights) { 383 /* 384 * Normally the receive buffer is flushed later, 385 * in sofree, but if our receive buffer holds references 386 * to descriptors that are now garbage, we will dispose 387 * of those descriptor references after the garbage collector 388 * gets them (resulting in a "panic: closef: count < 0"). 389 */ 390 sorflush(unp->unp_socket); 391 free(unp, M_PCB); 392 unp_gc(); 393 } else 394 free(unp, M_PCB); 395 } 396 397 int 398 unp_bind(struct unpcb *unp, struct mbuf *nam, struct proc *p) 399 { 400 struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *); 401 struct mbuf *nam2; 402 struct vnode *vp; 403 struct vattr vattr; 404 int error; 405 struct nameidata nd; 406 size_t pathlen; 407 408 if (unp->unp_vnode != NULL) 409 return (EINVAL); 410 411 if (soun->sun_len > sizeof(struct sockaddr_un) || 412 soun->sun_len < offsetof(struct sockaddr_un, sun_path)) 413 return (EINVAL); 414 if (soun->sun_family != AF_UNIX) 415 return (EAFNOSUPPORT); 416 417 pathlen = strnlen(soun->sun_path, soun->sun_len - 418 offsetof(struct sockaddr_un, sun_path)); 419 if (pathlen == sizeof(soun->sun_path)) 420 return (EINVAL); 421 422 nam2 = m_getclr(M_WAITOK, MT_SONAME); 423 nam2->m_len = sizeof(struct sockaddr_un); 424 memcpy(mtod(nam2, struct sockaddr_un *), soun, 425 offsetof(struct sockaddr_un, sun_path) + pathlen); 426 /* No need to NUL terminate: m_getclr() returns bzero'd mbufs. */ 427 428 soun = mtod(nam2, struct sockaddr_un *); 429 430 /* Fixup sun_len to keep it in sync with m_len. */ 431 soun->sun_len = nam2->m_len; 432 433 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE, 434 soun->sun_path, p); 435 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 436 if ((error = namei(&nd)) != 0) { 437 m_freem(nam2); 438 return (error); 439 } 440 vp = nd.ni_vp; 441 if (vp != NULL) { 442 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 443 if (nd.ni_dvp == vp) 444 vrele(nd.ni_dvp); 445 else 446 vput(nd.ni_dvp); 447 vrele(vp); 448 m_freem(nam2); 449 return (EADDRINUSE); 450 } 451 VATTR_NULL(&vattr); 452 vattr.va_type = VSOCK; 453 vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask; 454 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 455 if (error) { 456 m_freem(nam2); 457 return (error); 458 } 459 unp->unp_addr = nam2; 460 vp = nd.ni_vp; 461 vp->v_socket = unp->unp_socket; 462 unp->unp_vnode = vp; 463 unp->unp_connid.uid = p->p_ucred->cr_uid; 464 unp->unp_connid.gid = p->p_ucred->cr_gid; 465 unp->unp_connid.pid = p->p_p->ps_mainproc->p_pid; 466 unp->unp_flags |= UNP_FEIDSBIND; 467 VOP_UNLOCK(vp, 0, p); 468 return (0); 469 } 470 471 int 472 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p) 473 { 474 struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *); 475 struct vnode *vp; 476 struct socket *so2, *so3; 477 struct unpcb *unp, *unp2, *unp3; 478 int error; 479 struct nameidata nd; 480 481 if (soun->sun_family != AF_UNIX) 482 return (EAFNOSUPPORT); 483 484 if (nam->m_len < sizeof(struct sockaddr_un)) 485 *(mtod(nam, caddr_t) + nam->m_len) = 0; 486 else if (nam->m_len > sizeof(struct sockaddr_un)) 487 return (EINVAL); 488 else if (memchr(soun->sun_path, '\0', sizeof(soun->sun_path)) == NULL) 489 return (EINVAL); 490 491 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p); 492 if ((error = namei(&nd)) != 0) 493 return (error); 494 vp = nd.ni_vp; 495 if (vp->v_type != VSOCK) { 496 error = ENOTSOCK; 497 goto bad; 498 } 499 if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0) 500 goto bad; 501 so2 = vp->v_socket; 502 if (so2 == NULL) { 503 error = ECONNREFUSED; 504 goto bad; 505 } 506 if (so->so_type != so2->so_type) { 507 error = EPROTOTYPE; 508 goto bad; 509 } 510 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 511 if ((so2->so_options & SO_ACCEPTCONN) == 0 || 512 (so3 = sonewconn(so2, 0)) == 0) { 513 error = ECONNREFUSED; 514 goto bad; 515 } 516 unp = sotounpcb(so); 517 unp2 = sotounpcb(so2); 518 unp3 = sotounpcb(so3); 519 if (unp2->unp_addr) 520 unp3->unp_addr = 521 m_copy(unp2->unp_addr, 0, (int)M_COPYALL); 522 unp3->unp_connid.uid = p->p_ucred->cr_uid; 523 unp3->unp_connid.gid = p->p_ucred->cr_gid; 524 unp3->unp_connid.pid = p->p_p->ps_mainproc->p_pid; 525 unp3->unp_flags |= UNP_FEIDS; 526 so2 = so3; 527 if (unp2->unp_flags & UNP_FEIDSBIND) { 528 unp->unp_connid = unp2->unp_connid; 529 unp->unp_flags |= UNP_FEIDS; 530 } 531 } 532 error = unp_connect2(so, so2); 533 bad: 534 vput(vp); 535 return (error); 536 } 537 538 int 539 unp_connect2(struct socket *so, struct socket *so2) 540 { 541 struct unpcb *unp = sotounpcb(so); 542 struct unpcb *unp2; 543 544 if (so2->so_type != so->so_type) 545 return (EPROTOTYPE); 546 unp2 = sotounpcb(so2); 547 unp->unp_conn = unp2; 548 switch (so->so_type) { 549 550 case SOCK_DGRAM: 551 unp->unp_nextref = unp2->unp_refs; 552 unp2->unp_refs = unp; 553 soisconnected(so); 554 break; 555 556 case SOCK_STREAM: 557 case SOCK_SEQPACKET: 558 unp2->unp_conn = unp; 559 soisconnected(so); 560 soisconnected(so2); 561 break; 562 563 default: 564 panic("unp_connect2"); 565 } 566 return (0); 567 } 568 569 void 570 unp_disconnect(struct unpcb *unp) 571 { 572 struct unpcb *unp2 = unp->unp_conn; 573 574 if (unp2 == NULL) 575 return; 576 unp->unp_conn = NULL; 577 switch (unp->unp_socket->so_type) { 578 579 case SOCK_DGRAM: 580 if (unp2->unp_refs == unp) 581 unp2->unp_refs = unp->unp_nextref; 582 else { 583 unp2 = unp2->unp_refs; 584 for (;;) { 585 if (unp2 == NULL) 586 panic("unp_disconnect"); 587 if (unp2->unp_nextref == unp) 588 break; 589 unp2 = unp2->unp_nextref; 590 } 591 unp2->unp_nextref = unp->unp_nextref; 592 } 593 unp->unp_nextref = NULL; 594 unp->unp_socket->so_state &= ~SS_ISCONNECTED; 595 break; 596 597 case SOCK_STREAM: 598 case SOCK_SEQPACKET: 599 soisdisconnected(unp->unp_socket); 600 unp2->unp_conn = NULL; 601 soisdisconnected(unp2->unp_socket); 602 break; 603 } 604 } 605 606 void 607 unp_shutdown(struct unpcb *unp) 608 { 609 struct socket *so; 610 611 switch (unp->unp_socket->so_type) { 612 case SOCK_STREAM: 613 case SOCK_SEQPACKET: 614 if (unp->unp_conn && (so = unp->unp_conn->unp_socket)) 615 socantrcvmore(so); 616 break; 617 default: 618 break; 619 } 620 } 621 622 void 623 unp_drop(struct unpcb *unp, int errno) 624 { 625 struct socket *so = unp->unp_socket; 626 627 so->so_error = errno; 628 unp_disconnect(unp); 629 if (so->so_head) { 630 so->so_pcb = NULL; 631 sofree(so); 632 m_freem(unp->unp_addr); 633 free(unp, M_PCB); 634 } 635 } 636 637 #ifdef notdef 638 unp_drain(void) 639 { 640 641 } 642 #endif 643 644 int 645 unp_externalize(struct mbuf *rights, socklen_t controllen) 646 { 647 struct proc *p = curproc; /* XXX */ 648 struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 649 int i, *fdp = NULL; 650 struct file **rp; 651 struct file *fp; 652 int nfds, error = 0; 653 654 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / 655 sizeof(struct file *); 656 if (controllen < CMSG_ALIGN(sizeof(struct cmsghdr))) 657 controllen = 0; 658 else 659 controllen -= CMSG_ALIGN(sizeof(struct cmsghdr)); 660 if (nfds > controllen / sizeof(int)) { 661 error = EMSGSIZE; 662 goto restart; 663 } 664 665 rp = (struct file **)CMSG_DATA(cm); 666 667 fdp = malloc(nfds * sizeof(int), M_TEMP, M_WAITOK); 668 669 /* Make sure the recipient should be able to see the descriptors.. */ 670 if (p->p_fd->fd_rdir != NULL) { 671 rp = (struct file **)CMSG_DATA(cm); 672 for (i = 0; i < nfds; i++) { 673 fp = *rp++; 674 /* 675 * No to block devices. If passing a directory, 676 * make sure that it is underneath the root. 677 */ 678 if (fp->f_type == DTYPE_VNODE) { 679 struct vnode *vp = (struct vnode *)fp->f_data; 680 681 if (vp->v_type == VBLK || 682 (vp->v_type == VDIR && 683 !vn_isunder(vp, p->p_fd->fd_rdir, p))) { 684 error = EPERM; 685 break; 686 } 687 } 688 } 689 } 690 691 restart: 692 fdplock(p->p_fd); 693 if (error != 0) { 694 rp = ((struct file **)CMSG_DATA(cm)); 695 for (i = 0; i < nfds; i++) { 696 fp = *rp; 697 /* 698 * zero the pointer before calling unp_discard, 699 * since it may end up in unp_gc().. 700 */ 701 *rp++ = NULL; 702 unp_discard(fp); 703 } 704 goto out; 705 } 706 707 /* 708 * First loop -- allocate file descriptor table slots for the 709 * new descriptors. 710 */ 711 rp = ((struct file **)CMSG_DATA(cm)); 712 for (i = 0; i < nfds; i++) { 713 bcopy(rp, &fp, sizeof(fp)); 714 rp++; 715 if ((error = fdalloc(p, 0, &fdp[i])) != 0) { 716 /* 717 * Back out what we've done so far. 718 */ 719 for (--i; i >= 0; i--) 720 fdremove(p->p_fd, fdp[i]); 721 722 if (error == ENOSPC) { 723 fdexpand(p); 724 error = 0; 725 } else { 726 /* 727 * This is the error that has historically 728 * been returned, and some callers may 729 * expect it. 730 */ 731 error = EMSGSIZE; 732 } 733 fdpunlock(p->p_fd); 734 goto restart; 735 } 736 737 /* 738 * Make the slot reference the descriptor so that 739 * fdalloc() works properly.. We finalize it all 740 * in the loop below. 741 */ 742 p->p_fd->fd_ofiles[fdp[i]] = fp; 743 } 744 745 /* 746 * Now that adding them has succeeded, update all of the 747 * descriptor passing state. 748 */ 749 rp = (struct file **)CMSG_DATA(cm); 750 for (i = 0; i < nfds; i++) { 751 fp = *rp++; 752 fp->f_msgcount--; 753 unp_rights--; 754 } 755 756 /* 757 * Copy temporary array to message and adjust length, in case of 758 * transition from large struct file pointers to ints. 759 */ 760 memcpy(CMSG_DATA(cm), fdp, nfds * sizeof(int)); 761 cm->cmsg_len = CMSG_LEN(nfds * sizeof(int)); 762 rights->m_len = CMSG_LEN(nfds * sizeof(int)); 763 out: 764 fdpunlock(p->p_fd); 765 if (fdp) 766 free(fdp, M_TEMP); 767 return (error); 768 } 769 770 int 771 unp_internalize(struct mbuf *control, struct proc *p) 772 { 773 struct filedesc *fdp = p->p_fd; 774 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 775 struct file **rp, *fp; 776 int i, error; 777 int nfds, *ip, fd, neededspace; 778 779 /* 780 * Check for two potential msg_controllen values because 781 * IETF stuck their nose in a place it does not belong. 782 */ 783 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || 784 !(cm->cmsg_len == control->m_len || 785 control->m_len == CMSG_ALIGN(cm->cmsg_len))) 786 return (EINVAL); 787 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int); 788 789 if (unp_rights + nfds > maxfiles / 10) 790 return (EMFILE); 791 792 /* Make sure we have room for the struct file pointers */ 793 morespace: 794 neededspace = CMSG_SPACE(nfds * sizeof(struct file *)) - 795 control->m_len; 796 if (neededspace > M_TRAILINGSPACE(control)) { 797 char *tmp; 798 /* if we already have a cluster, the message is just too big */ 799 if (control->m_flags & M_EXT) 800 return (E2BIG); 801 802 /* copy cmsg data temporarily out of the mbuf */ 803 tmp = malloc(control->m_len, M_TEMP, M_WAITOK); 804 memcpy(tmp, mtod(control, caddr_t), control->m_len); 805 806 /* allocate a cluster and try again */ 807 MCLGET(control, M_WAIT); 808 if ((control->m_flags & M_EXT) == 0) { 809 free(tmp, M_TEMP); 810 return (ENOBUFS); /* allocation failed */ 811 } 812 813 /* copy the data back into the cluster */ 814 cm = mtod(control, struct cmsghdr *); 815 memcpy(cm, tmp, control->m_len); 816 free(tmp, M_TEMP); 817 goto morespace; 818 } 819 820 /* adjust message & mbuf to note amount of space actually used. */ 821 cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct file *)); 822 control->m_len = CMSG_SPACE(nfds * sizeof(struct file *)); 823 824 ip = ((int *)CMSG_DATA(cm)) + nfds - 1; 825 rp = ((struct file **)CMSG_DATA(cm)) + nfds - 1; 826 for (i = 0; i < nfds; i++) { 827 bcopy(ip, &fd, sizeof fd); 828 ip--; 829 if ((fp = fd_getfile(fdp, fd)) == NULL) { 830 error = EBADF; 831 goto fail; 832 } 833 if (fp->f_count == LONG_MAX-2 || 834 fp->f_msgcount == LONG_MAX-2) { 835 error = EDEADLK; 836 goto fail; 837 } 838 /* kq descriptors cannot be copied */ 839 if (fp->f_type == DTYPE_KQUEUE) { 840 error = EINVAL; 841 goto fail; 842 } 843 bcopy(&fp, rp, sizeof fp); 844 rp--; 845 fp->f_count++; 846 fp->f_msgcount++; 847 unp_rights++; 848 } 849 return (0); 850 fail: 851 /* Back out what we just did. */ 852 for ( ; i > 0; i--) { 853 rp++; 854 bcopy(rp, &fp, sizeof(fp)); 855 fp->f_count--; 856 fp->f_msgcount--; 857 unp_rights--; 858 } 859 860 return (error); 861 } 862 863 int unp_defer, unp_gcing; 864 extern struct domain unixdomain; 865 866 void 867 unp_gc(void) 868 { 869 struct file *fp, *nextfp; 870 struct socket *so; 871 struct file **extra_ref, **fpp; 872 int nunref, i; 873 874 if (unp_gcing) 875 return; 876 unp_gcing = 1; 877 unp_defer = 0; 878 LIST_FOREACH(fp, &filehead, f_list) 879 fp->f_iflags &= ~(FIF_MARK|FIF_DEFER); 880 do { 881 LIST_FOREACH(fp, &filehead, f_list) { 882 if (fp->f_iflags & FIF_DEFER) { 883 fp->f_iflags &= ~FIF_DEFER; 884 unp_defer--; 885 } else { 886 if (fp->f_count == 0) 887 continue; 888 if (fp->f_iflags & FIF_MARK) 889 continue; 890 if (fp->f_count == fp->f_msgcount) 891 continue; 892 } 893 fp->f_iflags |= FIF_MARK; 894 895 if (fp->f_type != DTYPE_SOCKET || 896 (so = (struct socket *)fp->f_data) == NULL) 897 continue; 898 if (so->so_proto->pr_domain != &unixdomain || 899 (so->so_proto->pr_flags&PR_RIGHTS) == 0) 900 continue; 901 #ifdef notdef 902 if (so->so_rcv.sb_flags & SB_LOCK) { 903 /* 904 * This is problematical; it's not clear 905 * we need to wait for the sockbuf to be 906 * unlocked (on a uniprocessor, at least), 907 * and it's also not clear what to do 908 * if sbwait returns an error due to receipt 909 * of a signal. If sbwait does return 910 * an error, we'll go into an infinite 911 * loop. Delete all of this for now. 912 */ 913 (void) sbwait(&so->so_rcv); 914 goto restart; 915 } 916 #endif 917 unp_scan(so->so_rcv.sb_mb, unp_mark, 0); 918 } 919 } while (unp_defer); 920 /* 921 * We grab an extra reference to each of the file table entries 922 * that are not otherwise accessible and then free the rights 923 * that are stored in messages on them. 924 * 925 * The bug in the original code is a little tricky, so I'll describe 926 * what's wrong with it here. 927 * 928 * It is incorrect to simply unp_discard each entry for f_msgcount 929 * times -- consider the case of sockets A and B that contain 930 * references to each other. On a last close of some other socket, 931 * we trigger a gc since the number of outstanding rights (unp_rights) 932 * is non-zero. If during the sweep phase the gc code un_discards, 933 * we end up doing a (full) closef on the descriptor. A closef on A 934 * results in the following chain. Closef calls soo_close, which 935 * calls soclose. Soclose calls first (through the switch 936 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 937 * returns because the previous instance had set unp_gcing, and 938 * we return all the way back to soclose, which marks the socket 939 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 940 * to free up the rights that are queued in messages on the socket A, 941 * i.e., the reference on B. The sorflush calls via the dom_dispose 942 * switch unp_dispose, which unp_scans with unp_discard. This second 943 * instance of unp_discard just calls closef on B. 944 * 945 * Well, a similar chain occurs on B, resulting in a sorflush on B, 946 * which results in another closef on A. Unfortunately, A is already 947 * being closed, and the descriptor has already been marked with 948 * SS_NOFDREF, and soclose panics at this point. 949 * 950 * Here, we first take an extra reference to each inaccessible 951 * descriptor. Then, we call sorflush ourself, since we know 952 * it is a Unix domain socket anyhow. After we destroy all the 953 * rights carried in messages, we do a last closef to get rid 954 * of our extra reference. This is the last close, and the 955 * unp_detach etc will shut down the socket. 956 * 957 * 91/09/19, bsy@cs.cmu.edu 958 */ 959 extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK); 960 for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; 961 fp != NULL; fp = nextfp) { 962 nextfp = LIST_NEXT(fp, f_list); 963 if (fp->f_count == 0) 964 continue; 965 if (fp->f_count == fp->f_msgcount && 966 !(fp->f_iflags & FIF_MARK)) { 967 *fpp++ = fp; 968 nunref++; 969 FREF(fp); 970 fp->f_count++; 971 } 972 } 973 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 974 if ((*fpp)->f_type == DTYPE_SOCKET && (*fpp)->f_data != NULL) 975 sorflush((struct socket *)(*fpp)->f_data); 976 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 977 (void) closef(*fpp, NULL); 978 free((caddr_t)extra_ref, M_FILE); 979 unp_gcing = 0; 980 } 981 982 void 983 unp_dispose(struct mbuf *m) 984 { 985 986 if (m) 987 unp_scan(m, unp_discard, 1); 988 } 989 990 void 991 unp_scan(struct mbuf *m0, void (*op)(struct file *), int discard) 992 { 993 struct mbuf *m; 994 struct file **rp, *fp; 995 struct cmsghdr *cm; 996 int i; 997 int qfds; 998 999 while (m0) { 1000 for (m = m0; m; m = m->m_next) { 1001 if (m->m_type == MT_CONTROL && 1002 m->m_len >= sizeof(*cm)) { 1003 cm = mtod(m, struct cmsghdr *); 1004 if (cm->cmsg_level != SOL_SOCKET || 1005 cm->cmsg_type != SCM_RIGHTS) 1006 continue; 1007 qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm)) 1008 / sizeof(struct file *); 1009 rp = (struct file **)CMSG_DATA(cm); 1010 for (i = 0; i < qfds; i++) { 1011 fp = *rp; 1012 if (discard) 1013 *rp = 0; 1014 (*op)(fp); 1015 rp++; 1016 } 1017 break; /* XXX, but saves time */ 1018 } 1019 } 1020 m0 = m0->m_nextpkt; 1021 } 1022 } 1023 1024 void 1025 unp_mark(struct file *fp) 1026 { 1027 if (fp == NULL) 1028 return; 1029 1030 if (fp->f_iflags & (FIF_MARK|FIF_DEFER)) 1031 return; 1032 1033 if (fp->f_type == DTYPE_SOCKET) { 1034 unp_defer++; 1035 fp->f_iflags |= FIF_DEFER; 1036 } else { 1037 fp->f_iflags |= FIF_MARK; 1038 } 1039 } 1040 1041 void 1042 unp_discard(struct file *fp) 1043 { 1044 1045 if (fp == NULL) 1046 return; 1047 FREF(fp); 1048 fp->f_msgcount--; 1049 unp_rights--; 1050 (void) closef(fp, NULL); 1051 } 1052