1 /* $OpenBSD: uipc_usrreq.c,v 1.34 2007/01/16 17:52:18 thib Exp $ */ 2 /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/proc.h> 38 #include <sys/filedesc.h> 39 #include <sys/domain.h> 40 #include <sys/protosw.h> 41 #include <sys/socket.h> 42 #include <sys/socketvar.h> 43 #include <sys/unpcb.h> 44 #include <sys/un.h> 45 #include <sys/namei.h> 46 #include <sys/vnode.h> 47 #include <sys/file.h> 48 #include <sys/stat.h> 49 #include <sys/mbuf.h> 50 51 /* 52 * Unix communications domain. 53 * 54 * TODO: 55 * SEQPACKET, RDM 56 * rethink name space problems 57 * need a proper out-of-band 58 */ 59 struct sockaddr sun_noname = { sizeof(sun_noname), AF_UNIX }; 60 ino_t unp_ino; /* prototype for fake inode numbers */ 61 62 /*ARGSUSED*/ 63 int 64 uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, 65 struct mbuf *control) 66 { 67 struct unpcb *unp = sotounpcb(so); 68 struct socket *so2; 69 int error = 0; 70 struct proc *p = curproc; /* XXX */ 71 72 if (req == PRU_CONTROL) 73 return (EOPNOTSUPP); 74 if (req != PRU_SEND && control && control->m_len) { 75 error = EOPNOTSUPP; 76 goto release; 77 } 78 if (unp == NULL && req != PRU_ATTACH) { 79 error = EINVAL; 80 goto release; 81 } 82 switch (req) { 83 84 case PRU_ATTACH: 85 if (unp) { 86 error = EISCONN; 87 break; 88 } 89 error = unp_attach(so); 90 break; 91 92 case PRU_DETACH: 93 unp_detach(unp); 94 break; 95 96 case PRU_BIND: 97 error = unp_bind(unp, nam, p); 98 break; 99 100 case PRU_LISTEN: 101 if (unp->unp_vnode == NULL) 102 error = EINVAL; 103 break; 104 105 case PRU_CONNECT: 106 error = unp_connect(so, nam, p); 107 break; 108 109 case PRU_CONNECT2: 110 error = unp_connect2(so, (struct socket *)nam); 111 break; 112 113 case PRU_DISCONNECT: 114 unp_disconnect(unp); 115 break; 116 117 case PRU_ACCEPT: 118 /* 119 * Pass back name of connected socket, 120 * if it was bound and we are still connected 121 * (our peer may have closed already!). 122 */ 123 if (unp->unp_conn && unp->unp_conn->unp_addr) { 124 nam->m_len = unp->unp_conn->unp_addr->m_len; 125 bcopy(mtod(unp->unp_conn->unp_addr, caddr_t), 126 mtod(nam, caddr_t), (unsigned)nam->m_len); 127 } else { 128 nam->m_len = sizeof(sun_noname); 129 *(mtod(nam, struct sockaddr *)) = sun_noname; 130 } 131 break; 132 133 case PRU_SHUTDOWN: 134 socantsendmore(so); 135 unp_shutdown(unp); 136 break; 137 138 case PRU_RCVD: 139 switch (so->so_type) { 140 141 case SOCK_DGRAM: 142 panic("uipc 1"); 143 /*NOTREACHED*/ 144 145 case SOCK_STREAM: 146 #define rcv (&so->so_rcv) 147 #define snd (&so2->so_snd) 148 if (unp->unp_conn == NULL) 149 break; 150 so2 = unp->unp_conn->unp_socket; 151 /* 152 * Adjust backpressure on sender 153 * and wakeup any waiting to write. 154 */ 155 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt; 156 unp->unp_mbcnt = rcv->sb_mbcnt; 157 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc; 158 unp->unp_cc = rcv->sb_cc; 159 sowwakeup(so2); 160 #undef snd 161 #undef rcv 162 break; 163 164 default: 165 panic("uipc 2"); 166 } 167 break; 168 169 case PRU_SEND: 170 if (control && (error = unp_internalize(control, p))) 171 break; 172 switch (so->so_type) { 173 174 case SOCK_DGRAM: { 175 struct sockaddr *from; 176 177 if (nam) { 178 if (unp->unp_conn) { 179 error = EISCONN; 180 break; 181 } 182 error = unp_connect(so, nam, p); 183 if (error) 184 break; 185 } else { 186 if (unp->unp_conn == NULL) { 187 error = ENOTCONN; 188 break; 189 } 190 } 191 so2 = unp->unp_conn->unp_socket; 192 if (unp->unp_addr) 193 from = mtod(unp->unp_addr, struct sockaddr *); 194 else 195 from = &sun_noname; 196 if (sbappendaddr(&so2->so_rcv, from, m, control)) { 197 sorwakeup(so2); 198 m = NULL; 199 control = NULL; 200 } else 201 error = ENOBUFS; 202 if (nam) 203 unp_disconnect(unp); 204 break; 205 } 206 207 case SOCK_STREAM: 208 #define rcv (&so2->so_rcv) 209 #define snd (&so->so_snd) 210 if (so->so_state & SS_CANTSENDMORE) { 211 error = EPIPE; 212 break; 213 } 214 if (unp->unp_conn == NULL) { 215 error = ENOTCONN; 216 break; 217 } 218 so2 = unp->unp_conn->unp_socket; 219 /* 220 * Send to paired receive port, and then reduce 221 * send buffer hiwater marks to maintain backpressure. 222 * Wake up readers. 223 */ 224 if (control) { 225 if (sbappendcontrol(rcv, m, control)) 226 control = NULL; 227 } else 228 sbappend(rcv, m); 229 snd->sb_mbmax -= 230 rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt; 231 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt; 232 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc; 233 unp->unp_conn->unp_cc = rcv->sb_cc; 234 sorwakeup(so2); 235 m = NULL; 236 #undef snd 237 #undef rcv 238 break; 239 240 default: 241 panic("uipc 4"); 242 } 243 /* we need to undo unp_internalize in case of errors */ 244 if (control && error) 245 unp_dispose(control); 246 break; 247 248 case PRU_ABORT: 249 unp_drop(unp, ECONNABORTED); 250 break; 251 252 case PRU_SENSE: 253 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; 254 if (so->so_type == SOCK_STREAM && unp->unp_conn != NULL) { 255 so2 = unp->unp_conn->unp_socket; 256 ((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc; 257 } 258 ((struct stat *) m)->st_dev = NODEV; 259 if (unp->unp_ino == 0) 260 unp->unp_ino = unp_ino++; 261 ((struct stat *) m)->st_atimespec = 262 ((struct stat *) m)->st_mtimespec = 263 ((struct stat *) m)->st_ctimespec = unp->unp_ctime; 264 ((struct stat *) m)->st_ino = unp->unp_ino; 265 return (0); 266 267 case PRU_RCVOOB: 268 return (EOPNOTSUPP); 269 270 case PRU_SENDOOB: 271 error = EOPNOTSUPP; 272 break; 273 274 case PRU_SOCKADDR: 275 if (unp->unp_addr) { 276 nam->m_len = unp->unp_addr->m_len; 277 bcopy(mtod(unp->unp_addr, caddr_t), 278 mtod(nam, caddr_t), (unsigned)nam->m_len); 279 } else 280 nam->m_len = 0; 281 break; 282 283 case PRU_PEERADDR: 284 if (unp->unp_conn && unp->unp_conn->unp_addr) { 285 nam->m_len = unp->unp_conn->unp_addr->m_len; 286 bcopy(mtod(unp->unp_conn->unp_addr, caddr_t), 287 mtod(nam, caddr_t), (unsigned)nam->m_len); 288 } else 289 nam->m_len = 0; 290 break; 291 292 case PRU_PEEREID: 293 if (unp->unp_flags & UNP_FEIDS) { 294 nam->m_len = sizeof(struct unpcbid); 295 bcopy((caddr_t)(&(unp->unp_connid)), 296 mtod(nam, caddr_t), (unsigned)nam->m_len); 297 } else 298 nam->m_len = 0; 299 break; 300 301 case PRU_SLOWTIMO: 302 break; 303 304 default: 305 panic("piusrreq"); 306 } 307 release: 308 if (control) 309 m_freem(control); 310 if (m) 311 m_freem(m); 312 return (error); 313 } 314 315 /* 316 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 317 * for stream sockets, although the total for sender and receiver is 318 * actually only PIPSIZ. 319 * Datagram sockets really use the sendspace as the maximum datagram size, 320 * and don't really want to reserve the sendspace. Their recvspace should 321 * be large enough for at least one max-size datagram plus address. 322 */ 323 #define PIPSIZ 4096 324 u_long unpst_sendspace = PIPSIZ; 325 u_long unpst_recvspace = PIPSIZ; 326 u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 327 u_long unpdg_recvspace = 4*1024; 328 329 int unp_rights; /* file descriptors in flight */ 330 331 int 332 unp_attach(struct socket *so) 333 { 334 struct unpcb *unp; 335 int error; 336 337 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 338 switch (so->so_type) { 339 340 case SOCK_STREAM: 341 error = soreserve(so, unpst_sendspace, unpst_recvspace); 342 break; 343 344 case SOCK_DGRAM: 345 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 346 break; 347 348 default: 349 panic("unp_attach"); 350 } 351 if (error) 352 return (error); 353 } 354 unp = malloc(sizeof(*unp), M_PCB, M_NOWAIT); 355 if (unp == NULL) 356 return (ENOBUFS); 357 bzero((caddr_t)unp, sizeof(*unp)); 358 unp->unp_socket = so; 359 so->so_pcb = unp; 360 nanotime(&unp->unp_ctime); 361 return (0); 362 } 363 364 void 365 unp_detach(struct unpcb *unp) 366 { 367 368 if (unp->unp_vnode) { 369 unp->unp_vnode->v_socket = NULL; 370 vrele(unp->unp_vnode); 371 unp->unp_vnode = NULL; 372 } 373 if (unp->unp_conn) 374 unp_disconnect(unp); 375 while (unp->unp_refs) 376 unp_drop(unp->unp_refs, ECONNRESET); 377 soisdisconnected(unp->unp_socket); 378 unp->unp_socket->so_pcb = NULL; 379 m_freem(unp->unp_addr); 380 if (unp_rights) { 381 /* 382 * Normally the receive buffer is flushed later, 383 * in sofree, but if our receive buffer holds references 384 * to descriptors that are now garbage, we will dispose 385 * of those descriptor references after the garbage collector 386 * gets them (resulting in a "panic: closef: count < 0"). 387 */ 388 sorflush(unp->unp_socket); 389 free(unp, M_PCB); 390 unp_gc(); 391 } else 392 free(unp, M_PCB); 393 } 394 395 int 396 unp_bind(struct unpcb *unp, struct mbuf *nam, struct proc *p) 397 { 398 struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *); 399 struct vnode *vp; 400 struct vattr vattr; 401 int error, namelen; 402 struct nameidata nd; 403 char buf[MLEN]; 404 405 if (unp->unp_vnode != NULL) 406 return (EINVAL); 407 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); 408 if (namelen <= 0 || namelen >= MLEN) 409 return EINVAL; 410 strncpy(buf, soun->sun_path, namelen); 411 buf[namelen] = 0; /* null-terminate the string */ 412 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE, buf, p); 413 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 414 if ((error = namei(&nd)) != 0) 415 return (error); 416 vp = nd.ni_vp; 417 if (vp != NULL) { 418 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 419 if (nd.ni_dvp == vp) 420 vrele(nd.ni_dvp); 421 else 422 vput(nd.ni_dvp); 423 vrele(vp); 424 return (EADDRINUSE); 425 } 426 VATTR_NULL(&vattr); 427 vattr.va_type = VSOCK; 428 vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask; 429 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 430 if (error) 431 return (error); 432 vp = nd.ni_vp; 433 vp->v_socket = unp->unp_socket; 434 unp->unp_vnode = vp; 435 unp->unp_addr = m_copy(nam, 0, (int)M_COPYALL); 436 unp->unp_connid.unp_euid = p->p_ucred->cr_uid; 437 unp->unp_connid.unp_egid = p->p_ucred->cr_gid; 438 unp->unp_flags |= UNP_FEIDSBIND; 439 VOP_UNLOCK(vp, 0, p); 440 return (0); 441 } 442 443 int 444 unp_connect(struct socket *so, struct mbuf *nam, struct proc *p) 445 { 446 struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *); 447 struct vnode *vp; 448 struct socket *so2, *so3; 449 struct unpcb *unp, *unp2, *unp3; 450 int error; 451 struct nameidata nd; 452 453 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p); 454 if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) { /* XXX */ 455 if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0) 456 return (EMSGSIZE); 457 } else 458 *(mtod(nam, caddr_t) + nam->m_len) = 0; 459 if ((error = namei(&nd)) != 0) 460 return (error); 461 vp = nd.ni_vp; 462 if (vp->v_type != VSOCK) { 463 error = ENOTSOCK; 464 goto bad; 465 } 466 if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0) 467 goto bad; 468 so2 = vp->v_socket; 469 if (so2 == NULL) { 470 error = ECONNREFUSED; 471 goto bad; 472 } 473 if (so->so_type != so2->so_type) { 474 error = EPROTOTYPE; 475 goto bad; 476 } 477 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 478 if ((so2->so_options & SO_ACCEPTCONN) == 0 || 479 (so3 = sonewconn(so2, 0)) == 0) { 480 error = ECONNREFUSED; 481 goto bad; 482 } 483 unp = sotounpcb(so); 484 unp2 = sotounpcb(so2); 485 unp3 = sotounpcb(so3); 486 if (unp2->unp_addr) 487 unp3->unp_addr = 488 m_copy(unp2->unp_addr, 0, (int)M_COPYALL); 489 unp3->unp_connid.unp_euid = p->p_ucred->cr_uid; 490 unp3->unp_connid.unp_egid = p->p_ucred->cr_gid; 491 unp3->unp_flags |= UNP_FEIDS; 492 so2 = so3; 493 if (unp2->unp_flags & UNP_FEIDSBIND) { 494 unp->unp_connid.unp_euid = unp2->unp_connid.unp_euid; 495 unp->unp_connid.unp_egid = unp2->unp_connid.unp_egid; 496 unp->unp_flags |= UNP_FEIDS; 497 } 498 } 499 error = unp_connect2(so, so2); 500 bad: 501 vput(vp); 502 return (error); 503 } 504 505 int 506 unp_connect2(struct socket *so, struct socket *so2) 507 { 508 struct unpcb *unp = sotounpcb(so); 509 struct unpcb *unp2; 510 511 if (so2->so_type != so->so_type) 512 return (EPROTOTYPE); 513 unp2 = sotounpcb(so2); 514 unp->unp_conn = unp2; 515 switch (so->so_type) { 516 517 case SOCK_DGRAM: 518 unp->unp_nextref = unp2->unp_refs; 519 unp2->unp_refs = unp; 520 soisconnected(so); 521 break; 522 523 case SOCK_STREAM: 524 unp2->unp_conn = unp; 525 soisconnected(so); 526 soisconnected(so2); 527 break; 528 529 default: 530 panic("unp_connect2"); 531 } 532 return (0); 533 } 534 535 void 536 unp_disconnect(struct unpcb *unp) 537 { 538 struct unpcb *unp2 = unp->unp_conn; 539 540 if (unp2 == NULL) 541 return; 542 unp->unp_conn = NULL; 543 switch (unp->unp_socket->so_type) { 544 545 case SOCK_DGRAM: 546 if (unp2->unp_refs == unp) 547 unp2->unp_refs = unp->unp_nextref; 548 else { 549 unp2 = unp2->unp_refs; 550 for (;;) { 551 if (unp2 == NULL) 552 panic("unp_disconnect"); 553 if (unp2->unp_nextref == unp) 554 break; 555 unp2 = unp2->unp_nextref; 556 } 557 unp2->unp_nextref = unp->unp_nextref; 558 } 559 unp->unp_nextref = NULL; 560 unp->unp_socket->so_state &= ~SS_ISCONNECTED; 561 break; 562 563 case SOCK_STREAM: 564 soisdisconnected(unp->unp_socket); 565 unp2->unp_conn = NULL; 566 soisdisconnected(unp2->unp_socket); 567 break; 568 } 569 } 570 571 #ifdef notdef 572 unp_abort(struct unpcb *unp) 573 { 574 unp_detach(unp); 575 } 576 #endif 577 578 void 579 unp_shutdown(struct unpcb *unp) 580 { 581 struct socket *so; 582 583 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && 584 (so = unp->unp_conn->unp_socket)) 585 socantrcvmore(so); 586 } 587 588 void 589 unp_drop(struct unpcb *unp, int errno) 590 { 591 struct socket *so = unp->unp_socket; 592 593 so->so_error = errno; 594 unp_disconnect(unp); 595 if (so->so_head) { 596 so->so_pcb = NULL; 597 sofree(so); 598 m_freem(unp->unp_addr); 599 free(unp, M_PCB); 600 } 601 } 602 603 #ifdef notdef 604 unp_drain(void) 605 { 606 607 } 608 #endif 609 610 int 611 unp_externalize(struct mbuf *rights) 612 { 613 struct proc *p = curproc; /* XXX */ 614 struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 615 int i, *fdp; 616 struct file **rp; 617 struct file *fp; 618 int nfds, error = 0; 619 620 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / 621 sizeof(struct file *); 622 rp = (struct file **)CMSG_DATA(cm); 623 624 fdp = malloc(nfds * sizeof(int), M_TEMP, M_WAITOK); 625 626 #ifdef notyet 627 /* Make sure the recipient should be able to see the descriptors.. */ 628 if (p->p_cwdi->cwdi_rdir != NULL) { 629 rp = (struct file **)CMSG_DATA(cm); 630 for (i = 0; i < nfds; i++) { 631 fp = *rp++; 632 /* 633 * If we are in a chroot'ed directory, and 634 * someone wants to pass us a directory, make 635 * sure it's inside the subtree we're allowed 636 * to access. 637 */ 638 if (fp->f_type == DTYPE_VNODE) { 639 struct vnode *vp = (struct vnode *)fp->f_data; 640 if ((vp->v_type == VDIR) && 641 !vn_isunder(vp, p->p_cwdi->cwdi_rdir, p)) { 642 error = EPERM; 643 break; 644 } 645 } 646 } 647 } 648 #endif 649 650 restart: 651 fdplock(p->p_fd); 652 if (error != 0) { 653 rp = ((struct file **)CMSG_DATA(cm)); 654 for (i = 0; i < nfds; i++) { 655 fp = *rp; 656 /* 657 * zero the pointer before calling unp_discard, 658 * since it may end up in unp_gc().. 659 */ 660 *rp++ = NULL; 661 unp_discard(fp); 662 } 663 goto out; 664 } 665 666 /* 667 * First loop -- allocate file descriptor table slots for the 668 * new descriptors. 669 */ 670 rp = ((struct file **)CMSG_DATA(cm)); 671 for (i = 0; i < nfds; i++) { 672 bcopy(rp, &fp, sizeof(fp)); 673 rp++; 674 if ((error = fdalloc(p, 0, &fdp[i])) != 0) { 675 /* 676 * Back out what we've done so far. 677 */ 678 for (--i; i >= 0; i--) 679 fdremove(p->p_fd, fdp[i]); 680 681 if (error == ENOSPC) { 682 fdexpand(p); 683 error = 0; 684 } else { 685 /* 686 * This is the error that has historically 687 * been returned, and some callers may 688 * expect it. 689 */ 690 error = EMSGSIZE; 691 } 692 fdpunlock(p->p_fd); 693 goto restart; 694 } 695 696 /* 697 * Make the slot reference the descriptor so that 698 * fdalloc() works properly.. We finalize it all 699 * in the loop below. 700 */ 701 p->p_fd->fd_ofiles[fdp[i]] = fp; 702 } 703 704 /* 705 * Now that adding them has succeeded, update all of the 706 * descriptor passing state. 707 */ 708 rp = (struct file **)CMSG_DATA(cm); 709 for (i = 0; i < nfds; i++) { 710 fp = *rp++; 711 fp->f_msgcount--; 712 unp_rights--; 713 } 714 715 /* 716 * Copy temporary array to message and adjust length, in case of 717 * transition from large struct file pointers to ints. 718 */ 719 memcpy(CMSG_DATA(cm), fdp, nfds * sizeof(int)); 720 cm->cmsg_len = CMSG_LEN(nfds * sizeof(int)); 721 rights->m_len = CMSG_SPACE(nfds * sizeof(int)); 722 out: 723 fdpunlock(p->p_fd); 724 free(fdp, M_TEMP); 725 return (error); 726 } 727 728 int 729 unp_internalize(struct mbuf *control, struct proc *p) 730 { 731 struct filedesc *fdp = p->p_fd; 732 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 733 struct file **rp, *fp; 734 int i, error; 735 int nfds, *ip, fd, neededspace; 736 737 if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET || 738 cm->cmsg_len != control->m_len) 739 return (EINVAL); 740 nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int); 741 742 /* Make sure we have room for the struct file pointers */ 743 morespace: 744 neededspace = CMSG_SPACE(nfds * sizeof(struct file *)) - 745 control->m_len; 746 if (neededspace > M_TRAILINGSPACE(control)) { 747 /* if we already have a cluster, the message is just too big */ 748 if (control->m_flags & M_EXT) 749 return (E2BIG); 750 751 /* allocate a cluster and try again */ 752 MCLGET(control, M_WAIT); 753 if ((control->m_flags & M_EXT) == 0) 754 return (ENOBUFS); /* allocation failed */ 755 756 /* copy the data to the cluster */ 757 memcpy(mtod(control, char *), cm, cm->cmsg_len); 758 cm = mtod(control, struct cmsghdr *); 759 goto morespace; 760 } 761 762 /* adjust message & mbuf to note amount of space actually used. */ 763 cm->cmsg_len = CMSG_LEN(nfds * sizeof(struct file *)); 764 control->m_len = CMSG_SPACE(nfds * sizeof(struct file *)); 765 766 ip = ((int *)CMSG_DATA(cm)) + nfds - 1; 767 rp = ((struct file **)CMSG_DATA(cm)) + nfds - 1; 768 for (i = 0; i < nfds; i++) { 769 bcopy(ip, &fd, sizeof fd); 770 ip--; 771 if ((fp = fd_getfile(fdp, fd)) == NULL) { 772 error = EBADF; 773 goto fail; 774 } 775 if (fp->f_count == LONG_MAX-2 || 776 fp->f_msgcount == LONG_MAX-2) { 777 error = EDEADLK; 778 goto fail; 779 } 780 bcopy(&fp, rp, sizeof fp); 781 rp--; 782 fp->f_count++; 783 fp->f_msgcount++; 784 unp_rights++; 785 } 786 return (0); 787 fail: 788 /* Back out what we just did. */ 789 for ( ; i > 0; i--) { 790 bcopy(rp, &fp, sizeof(fp)); 791 rp++; 792 fp->f_count--; 793 fp->f_msgcount--; 794 unp_rights--; 795 } 796 797 return (error); 798 } 799 800 int unp_defer, unp_gcing; 801 extern struct domain unixdomain; 802 803 void 804 unp_gc(void) 805 { 806 struct file *fp, *nextfp; 807 struct socket *so; 808 struct file **extra_ref, **fpp; 809 int nunref, i; 810 811 if (unp_gcing) 812 return; 813 unp_gcing = 1; 814 unp_defer = 0; 815 LIST_FOREACH(fp, &filehead, f_list) 816 fp->f_flag &= ~(FMARK|FDEFER); 817 do { 818 LIST_FOREACH(fp, &filehead, f_list) { 819 if (fp->f_flag & FDEFER) { 820 fp->f_flag &= ~FDEFER; 821 unp_defer--; 822 } else { 823 if (fp->f_count == 0) 824 continue; 825 if (fp->f_flag & FMARK) 826 continue; 827 if (fp->f_count == fp->f_msgcount) 828 continue; 829 } 830 fp->f_flag |= FMARK; 831 832 if (fp->f_type != DTYPE_SOCKET || 833 (so = (struct socket *)fp->f_data) == NULL) 834 continue; 835 if (so->so_proto->pr_domain != &unixdomain || 836 (so->so_proto->pr_flags&PR_RIGHTS) == 0) 837 continue; 838 #ifdef notdef 839 if (so->so_rcv.sb_flags & SB_LOCK) { 840 /* 841 * This is problematical; it's not clear 842 * we need to wait for the sockbuf to be 843 * unlocked (on a uniprocessor, at least), 844 * and it's also not clear what to do 845 * if sbwait returns an error due to receipt 846 * of a signal. If sbwait does return 847 * an error, we'll go into an infinite 848 * loop. Delete all of this for now. 849 */ 850 (void) sbwait(&so->so_rcv); 851 goto restart; 852 } 853 #endif 854 unp_scan(so->so_rcv.sb_mb, unp_mark, 0); 855 } 856 } while (unp_defer); 857 /* 858 * We grab an extra reference to each of the file table entries 859 * that are not otherwise accessible and then free the rights 860 * that are stored in messages on them. 861 * 862 * The bug in the original code is a little tricky, so I'll describe 863 * what's wrong with it here. 864 * 865 * It is incorrect to simply unp_discard each entry for f_msgcount 866 * times -- consider the case of sockets A and B that contain 867 * references to each other. On a last close of some other socket, 868 * we trigger a gc since the number of outstanding rights (unp_rights) 869 * is non-zero. If during the sweep phase the gc code un_discards, 870 * we end up doing a (full) closef on the descriptor. A closef on A 871 * results in the following chain. Closef calls soo_close, which 872 * calls soclose. Soclose calls first (through the switch 873 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 874 * returns because the previous instance had set unp_gcing, and 875 * we return all the way back to soclose, which marks the socket 876 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 877 * to free up the rights that are queued in messages on the socket A, 878 * i.e., the reference on B. The sorflush calls via the dom_dispose 879 * switch unp_dispose, which unp_scans with unp_discard. This second 880 * instance of unp_discard just calls closef on B. 881 * 882 * Well, a similar chain occurs on B, resulting in a sorflush on B, 883 * which results in another closef on A. Unfortunately, A is already 884 * being closed, and the descriptor has already been marked with 885 * SS_NOFDREF, and soclose panics at this point. 886 * 887 * Here, we first take an extra reference to each inaccessible 888 * descriptor. Then, we call sorflush ourself, since we know 889 * it is a Unix domain socket anyhow. After we destroy all the 890 * rights carried in messages, we do a last closef to get rid 891 * of our extra reference. This is the last close, and the 892 * unp_detach etc will shut down the socket. 893 * 894 * 91/09/19, bsy@cs.cmu.edu 895 */ 896 extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK); 897 for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; 898 fp != NULL; fp = nextfp) { 899 nextfp = LIST_NEXT(fp, f_list); 900 if (fp->f_count == 0) 901 continue; 902 if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) { 903 *fpp++ = fp; 904 nunref++; 905 FREF(fp); 906 fp->f_count++; 907 } 908 } 909 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 910 if ((*fpp)->f_type == DTYPE_SOCKET && (*fpp)->f_data != NULL) 911 sorflush((struct socket *)(*fpp)->f_data); 912 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 913 (void) closef(*fpp, NULL); 914 free((caddr_t)extra_ref, M_FILE); 915 unp_gcing = 0; 916 } 917 918 void 919 unp_dispose(struct mbuf *m) 920 { 921 922 if (m) 923 unp_scan(m, unp_discard, 1); 924 } 925 926 void 927 unp_scan(struct mbuf *m0, void (*op)(struct file *), int discard) 928 { 929 struct mbuf *m; 930 struct file **rp, *fp; 931 struct cmsghdr *cm; 932 int i; 933 int qfds; 934 935 while (m0) { 936 for (m = m0; m; m = m->m_next) { 937 if (m->m_type == MT_CONTROL && 938 m->m_len >= sizeof(*cm)) { 939 cm = mtod(m, struct cmsghdr *); 940 if (cm->cmsg_level != SOL_SOCKET || 941 cm->cmsg_type != SCM_RIGHTS) 942 continue; 943 qfds = (cm->cmsg_len - CMSG_ALIGN(sizeof *cm)) 944 / sizeof(struct file *); 945 rp = (struct file **)CMSG_DATA(cm); 946 for (i = 0; i < qfds; i++) { 947 fp = *rp; 948 if (discard) 949 *rp = 0; 950 (*op)(fp); 951 rp++; 952 } 953 break; /* XXX, but saves time */ 954 } 955 } 956 m0 = m0->m_nextpkt; 957 } 958 } 959 960 void 961 unp_mark(struct file *fp) 962 { 963 if (fp == NULL) 964 return; 965 966 if (fp->f_flag & FMARK) 967 return; 968 969 if (fp->f_flag & FDEFER) 970 return; 971 972 if (fp->f_type == DTYPE_SOCKET) { 973 unp_defer++; 974 fp->f_flag |= FDEFER; 975 } else { 976 fp->f_flag |= FMARK; 977 } 978 } 979 980 void 981 unp_discard(struct file *fp) 982 { 983 984 if (fp == NULL) 985 return; 986 FREF(fp); 987 fp->f_msgcount--; 988 unp_rights--; 989 (void) closef(fp, NULL); 990 } 991