1 /* $NetBSD: nfs_clntsocket.c,v 1.2 2014/09/05 05:34:57 matt Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1991, 1993, 1995 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95 35 */ 36 37 /* 38 * Socket operations for use by nfs 39 */ 40 41 #include <sys/cdefs.h> 42 __KERNEL_RCSID(0, "$NetBSD: nfs_clntsocket.c,v 1.2 2014/09/05 05:34:57 matt Exp $"); 43 44 #ifdef _KERNEL_OPT 45 #include "opt_nfs.h" 46 #include "opt_mbuftrace.h" 47 #endif 48 49 #include <sys/param.h> 50 #include <sys/systm.h> 51 #include <sys/evcnt.h> 52 #include <sys/callout.h> 53 #include <sys/proc.h> 54 #include <sys/mount.h> 55 #include <sys/kernel.h> 56 #include <sys/kmem.h> 57 #include <sys/mbuf.h> 58 #include <sys/vnode.h> 59 #include <sys/domain.h> 60 #include <sys/protosw.h> 61 #include <sys/socket.h> 62 #include <sys/socketvar.h> 63 #include <sys/syslog.h> 64 #include <sys/tprintf.h> 65 #include <sys/namei.h> 66 #include <sys/signal.h> 67 #include <sys/signalvar.h> 68 #include <sys/kauth.h> 69 70 #include <netinet/in.h> 71 #include <netinet/tcp.h> 72 73 #include <nfs/rpcv2.h> 74 #include <nfs/nfsproto.h> 75 #include <nfs/nfs.h> 76 #include <nfs/xdr_subs.h> 77 #include <nfs/nfsm_subs.h> 78 #include <nfs/nfsmount.h> 79 #include <nfs/nfsnode.h> 80 #include <nfs/nfsrtt.h> 81 #include <nfs/nfs_var.h> 82 83 static int nfs_sndlock(struct nfsmount *, struct nfsreq *); 84 static void nfs_sndunlock(struct nfsmount *); 85 86 /* 87 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all 88 * done by soreceive(), but for SOCK_STREAM we must deal with the Record 89 * Mark and consolidate the data into a new mbuf list. 90 * nb: Sometimes TCP passes the data up to soreceive() in long lists of 91 * small mbufs. 92 * For SOCK_STREAM we must be very careful to read an entire record once 93 * we have read any of it, even if the system call has been interrupted. 94 */ 95 static int 96 nfs_receive(struct nfsreq *rep, struct mbuf **aname, struct mbuf **mp, 97 struct lwp *l) 98 { 99 struct socket *so; 100 struct uio auio; 101 struct iovec aio; 102 struct mbuf *m; 103 struct mbuf *control; 104 u_int32_t len; 105 struct mbuf **getnam; 106 int error, sotype, rcvflg; 107 108 /* 109 * Set up arguments for soreceive() 110 */ 111 *mp = NULL; 112 *aname = NULL; 113 sotype = rep->r_nmp->nm_sotype; 114 115 /* 116 * For reliable protocols, lock against other senders/receivers 117 * in case a reconnect is necessary. 118 * For SOCK_STREAM, first get the Record Mark to find out how much 119 * more there is to get. 120 * We must lock the socket against other receivers 121 * until we have an entire rpc request/reply. 122 */ 123 if (sotype != SOCK_DGRAM) { 124 error = nfs_sndlock(rep->r_nmp, rep); 125 if (error) 126 return (error); 127 tryagain: 128 /* 129 * Check for fatal errors and resending request. 130 */ 131 /* 132 * Ugh: If a reconnect attempt just happened, nm_so 133 * would have changed. NULL indicates a failed 134 * attempt that has essentially shut down this 135 * mount point. 136 */ 137 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) { 138 nfs_sndunlock(rep->r_nmp); 139 return (EINTR); 140 } 141 so = rep->r_nmp->nm_so; 142 if (!so) { 143 error = nfs_reconnect(rep); 144 if (error) { 145 nfs_sndunlock(rep->r_nmp); 146 return (error); 147 } 148 goto tryagain; 149 } 150 while (rep->r_flags & R_MUSTRESEND) { 151 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 152 nfsstats.rpcretries++; 153 rep->r_rtt = 0; 154 rep->r_flags &= ~R_TIMING; 155 error = nfs_send(so, rep->r_nmp->nm_nam, m, rep, l); 156 if (error) { 157 if (error == EINTR || error == ERESTART || 158 (error = nfs_reconnect(rep)) != 0) { 159 nfs_sndunlock(rep->r_nmp); 160 return (error); 161 } 162 goto tryagain; 163 } 164 } 165 nfs_sndunlock(rep->r_nmp); 166 if (sotype == SOCK_STREAM) { 167 aio.iov_base = (void *) &len; 168 aio.iov_len = sizeof(u_int32_t); 169 auio.uio_iov = &aio; 170 auio.uio_iovcnt = 1; 171 auio.uio_rw = UIO_READ; 172 auio.uio_offset = 0; 173 auio.uio_resid = sizeof(u_int32_t); 174 UIO_SETUP_SYSSPACE(&auio); 175 do { 176 rcvflg = MSG_WAITALL; 177 error = (*so->so_receive)(so, NULL, &auio, 178 NULL, NULL, &rcvflg); 179 if (error == EWOULDBLOCK && rep) { 180 if (rep->r_flags & R_SOFTTERM) 181 return (EINTR); 182 /* 183 * if it seems that the server died after it 184 * received our request, set EPIPE so that 185 * we'll reconnect and retransmit requests. 186 */ 187 if (rep->r_rexmit >= rep->r_nmp->nm_retry) { 188 nfsstats.rpctimeouts++; 189 error = EPIPE; 190 } 191 } 192 } while (error == EWOULDBLOCK); 193 if (!error && auio.uio_resid > 0) { 194 /* 195 * Don't log a 0 byte receive; it means 196 * that the socket has been closed, and 197 * can happen during normal operation 198 * (forcible unmount or Solaris server). 199 */ 200 if (auio.uio_resid != sizeof (u_int32_t)) 201 log(LOG_INFO, 202 "short receive (%lu/%lu) from nfs server %s\n", 203 (u_long)sizeof(u_int32_t) - auio.uio_resid, 204 (u_long)sizeof(u_int32_t), 205 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 206 error = EPIPE; 207 } 208 if (error) 209 goto errout; 210 len = ntohl(len) & ~0x80000000; 211 /* 212 * This is SERIOUS! We are out of sync with the sender 213 * and forcing a disconnect/reconnect is all I can do. 214 */ 215 if (len > NFS_MAXPACKET) { 216 log(LOG_ERR, "%s (%d) from nfs server %s\n", 217 "impossible packet length", 218 len, 219 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 220 error = EFBIG; 221 goto errout; 222 } 223 auio.uio_resid = len; 224 do { 225 rcvflg = MSG_WAITALL; 226 error = (*so->so_receive)(so, NULL, 227 &auio, mp, NULL, &rcvflg); 228 } while (error == EWOULDBLOCK || error == EINTR || 229 error == ERESTART); 230 if (!error && auio.uio_resid > 0) { 231 if (len != auio.uio_resid) 232 log(LOG_INFO, 233 "short receive (%lu/%d) from nfs server %s\n", 234 (u_long)len - auio.uio_resid, len, 235 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 236 error = EPIPE; 237 } 238 } else { 239 /* 240 * NB: Since uio_resid is big, MSG_WAITALL is ignored 241 * and soreceive() will return when it has either a 242 * control msg or a data msg. 243 * We have no use for control msg., but must grab them 244 * and then throw them away so we know what is going 245 * on. 246 */ 247 auio.uio_resid = len = 100000000; /* Anything Big */ 248 /* not need to setup uio_vmspace */ 249 do { 250 rcvflg = 0; 251 error = (*so->so_receive)(so, NULL, 252 &auio, mp, &control, &rcvflg); 253 if (control) 254 m_freem(control); 255 if (error == EWOULDBLOCK && rep) { 256 if (rep->r_flags & R_SOFTTERM) 257 return (EINTR); 258 } 259 } while (error == EWOULDBLOCK || 260 (!error && *mp == NULL && control)); 261 if ((rcvflg & MSG_EOR) == 0) 262 printf("Egad!!\n"); 263 if (!error && *mp == NULL) 264 error = EPIPE; 265 len -= auio.uio_resid; 266 } 267 errout: 268 if (error && error != EINTR && error != ERESTART) { 269 m_freem(*mp); 270 *mp = NULL; 271 if (error != EPIPE) 272 log(LOG_INFO, 273 "receive error %d from nfs server %s\n", 274 error, 275 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 276 error = nfs_sndlock(rep->r_nmp, rep); 277 if (!error) 278 error = nfs_reconnect(rep); 279 if (!error) 280 goto tryagain; 281 else 282 nfs_sndunlock(rep->r_nmp); 283 } 284 } else { 285 if ((so = rep->r_nmp->nm_so) == NULL) 286 return (EACCES); 287 if (so->so_state & SS_ISCONNECTED) 288 getnam = NULL; 289 else 290 getnam = aname; 291 auio.uio_resid = len = 1000000; 292 /* not need to setup uio_vmspace */ 293 do { 294 rcvflg = 0; 295 error = (*so->so_receive)(so, getnam, &auio, mp, 296 NULL, &rcvflg); 297 if (error == EWOULDBLOCK && 298 (rep->r_flags & R_SOFTTERM)) 299 return (EINTR); 300 } while (error == EWOULDBLOCK); 301 len -= auio.uio_resid; 302 if (!error && *mp == NULL) 303 error = EPIPE; 304 } 305 if (error) { 306 m_freem(*mp); 307 *mp = NULL; 308 } 309 return (error); 310 } 311 312 /* 313 * Implement receipt of reply on a socket. 314 * We must search through the list of received datagrams matching them 315 * with outstanding requests using the xid, until ours is found. 316 */ 317 /* ARGSUSED */ 318 static int 319 nfs_reply(struct nfsreq *myrep, struct lwp *lwp) 320 { 321 struct nfsreq *rep; 322 struct nfsmount *nmp = myrep->r_nmp; 323 int32_t t1; 324 struct mbuf *mrep, *nam, *md; 325 u_int32_t rxid, *tl; 326 char *dpos, *cp2; 327 int error; 328 329 /* 330 * Loop around until we get our own reply 331 */ 332 for (;;) { 333 /* 334 * Lock against other receivers so that I don't get stuck in 335 * sbwait() after someone else has received my reply for me. 336 * Also necessary for connection based protocols to avoid 337 * race conditions during a reconnect. 338 */ 339 error = nfs_rcvlock(nmp, myrep); 340 if (error == EALREADY) 341 return (0); 342 if (error) 343 return (error); 344 /* 345 * Get the next Rpc reply off the socket 346 */ 347 348 mutex_enter(&nmp->nm_lock); 349 nmp->nm_waiters++; 350 mutex_exit(&nmp->nm_lock); 351 352 error = nfs_receive(myrep, &nam, &mrep, lwp); 353 354 mutex_enter(&nmp->nm_lock); 355 nmp->nm_waiters--; 356 cv_signal(&nmp->nm_disconcv); 357 mutex_exit(&nmp->nm_lock); 358 359 if (error) { 360 nfs_rcvunlock(nmp); 361 362 if (nmp->nm_iflag & NFSMNT_DISMNT) { 363 /* 364 * Oops, we're going away now.. 365 */ 366 return error; 367 } 368 /* 369 * Ignore routing errors on connectionless protocols? ? 370 */ 371 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { 372 nmp->nm_so->so_error = 0; 373 #ifdef DEBUG 374 if (ratecheck(&nfs_reply_last_err_time, 375 &nfs_err_interval)) 376 printf("%s: ignoring error %d\n", 377 __func__, error); 378 #endif 379 continue; 380 } 381 return (error); 382 } 383 if (nam) 384 m_freem(nam); 385 386 /* 387 * Get the xid and check that it is an rpc reply 388 */ 389 md = mrep; 390 dpos = mtod(md, void *); 391 nfsm_dissect(tl, u_int32_t *, 2*NFSX_UNSIGNED); 392 rxid = *tl++; 393 if (*tl != rpc_reply) { 394 nfsstats.rpcinvalid++; 395 m_freem(mrep); 396 nfsmout: 397 nfs_rcvunlock(nmp); 398 continue; 399 } 400 401 /* 402 * Loop through the request list to match up the reply 403 * Iff no match, just drop the datagram 404 */ 405 TAILQ_FOREACH(rep, &nfs_reqq, r_chain) { 406 if (rep->r_mrep == NULL && rxid == rep->r_xid) { 407 /* Found it.. */ 408 rep->r_mrep = mrep; 409 rep->r_md = md; 410 rep->r_dpos = dpos; 411 if (nfsrtton) { 412 struct rttl *rt; 413 414 rt = &nfsrtt.rttl[nfsrtt.pos]; 415 rt->proc = rep->r_procnum; 416 rt->rto = NFS_RTO(nmp, nfs_proct[rep->r_procnum]); 417 rt->sent = nmp->nm_sent; 418 rt->cwnd = nmp->nm_cwnd; 419 rt->srtt = nmp->nm_srtt[nfs_proct[rep->r_procnum] - 1]; 420 rt->sdrtt = nmp->nm_sdrtt[nfs_proct[rep->r_procnum] - 1]; 421 rt->fsid = nmp->nm_mountp->mnt_stat.f_fsidx; 422 getmicrotime(&rt->tstamp); 423 if (rep->r_flags & R_TIMING) 424 rt->rtt = rep->r_rtt; 425 else 426 rt->rtt = 1000000; 427 nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ; 428 } 429 /* 430 * Update congestion window. 431 * Do the additive increase of 432 * one rpc/rtt. 433 */ 434 if (nmp->nm_cwnd <= nmp->nm_sent) { 435 nmp->nm_cwnd += 436 (NFS_CWNDSCALE * NFS_CWNDSCALE + 437 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; 438 if (nmp->nm_cwnd > NFS_MAXCWND) 439 nmp->nm_cwnd = NFS_MAXCWND; 440 } 441 rep->r_flags &= ~R_SENT; 442 nmp->nm_sent -= NFS_CWNDSCALE; 443 /* 444 * Update rtt using a gain of 0.125 on the mean 445 * and a gain of 0.25 on the deviation. 446 */ 447 if (rep->r_flags & R_TIMING) { 448 /* 449 * Since the timer resolution of 450 * NFS_HZ is so course, it can often 451 * result in r_rtt == 0. Since 452 * r_rtt == N means that the actual 453 * rtt is between N+dt and N+2-dt ticks, 454 * add 1. 455 */ 456 t1 = rep->r_rtt + 1; 457 t1 -= (NFS_SRTT(rep) >> 3); 458 NFS_SRTT(rep) += t1; 459 if (t1 < 0) 460 t1 = -t1; 461 t1 -= (NFS_SDRTT(rep) >> 2); 462 NFS_SDRTT(rep) += t1; 463 } 464 nmp->nm_timeouts = 0; 465 break; 466 } 467 } 468 nfs_rcvunlock(nmp); 469 /* 470 * If not matched to a request, drop it. 471 * If it's mine, get out. 472 */ 473 if (rep == 0) { 474 nfsstats.rpcunexpected++; 475 m_freem(mrep); 476 } else if (rep == myrep) { 477 if (rep->r_mrep == NULL) 478 panic("nfsreply nil"); 479 return (0); 480 } 481 } 482 } 483 484 /* 485 * nfs_request - goes something like this 486 * - fill in request struct 487 * - links it into list 488 * - calls nfs_send() for first transmit 489 * - calls nfs_receive() to get reply 490 * - break down rpc header and return with nfs reply pointed to 491 * by mrep or error 492 * nb: always frees up mreq mbuf list 493 */ 494 int 495 nfs_request(struct nfsnode *np, struct mbuf *mrest, int procnum, struct lwp *lwp, kauth_cred_t cred, struct mbuf **mrp, struct mbuf **mdp, char **dposp, int *rexmitp) 496 { 497 struct mbuf *m, *mrep; 498 struct nfsreq *rep; 499 u_int32_t *tl; 500 int i; 501 struct nfsmount *nmp = VFSTONFS(np->n_vnode->v_mount); 502 struct mbuf *md, *mheadend; 503 char nickv[RPCX_NICKVERF]; 504 time_t waituntil; 505 char *dpos, *cp2; 506 int t1, s, error = 0, mrest_len, auth_len, auth_type; 507 int trylater_delay = NFS_TRYLATERDEL, failed_auth = 0; 508 int verf_len, verf_type; 509 u_int32_t xid; 510 char *auth_str, *verf_str; 511 NFSKERBKEY_T key; /* save session key */ 512 kauth_cred_t acred; 513 struct mbuf *mrest_backup = NULL; 514 kauth_cred_t origcred = NULL; /* XXX: gcc */ 515 bool retry_cred = true; 516 bool use_opencred = (np->n_flag & NUSEOPENCRED) != 0; 517 518 if (rexmitp != NULL) 519 *rexmitp = 0; 520 521 acred = kauth_cred_alloc(); 522 523 tryagain_cred: 524 KASSERT(cred != NULL); 525 rep = kmem_alloc(sizeof(*rep), KM_SLEEP); 526 rep->r_nmp = nmp; 527 KASSERT(lwp == NULL || lwp == curlwp); 528 rep->r_lwp = lwp; 529 rep->r_procnum = procnum; 530 i = 0; 531 m = mrest; 532 while (m) { 533 i += m->m_len; 534 m = m->m_next; 535 } 536 mrest_len = i; 537 538 /* 539 * Get the RPC header with authorization. 540 */ 541 kerbauth: 542 verf_str = auth_str = NULL; 543 if (nmp->nm_flag & NFSMNT_KERB) { 544 verf_str = nickv; 545 verf_len = sizeof (nickv); 546 auth_type = RPCAUTH_KERB4; 547 memset((void *)key, 0, sizeof (key)); 548 if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str, 549 &auth_len, verf_str, verf_len)) { 550 error = nfs_getauth(nmp, rep, cred, &auth_str, 551 &auth_len, verf_str, &verf_len, key); 552 if (error) { 553 kmem_free(rep, sizeof(*rep)); 554 m_freem(mrest); 555 KASSERT(kauth_cred_getrefcnt(acred) == 1); 556 kauth_cred_free(acred); 557 return (error); 558 } 559 } 560 retry_cred = false; 561 } else { 562 /* AUTH_UNIX */ 563 uid_t uid; 564 gid_t gid; 565 566 /* 567 * on the most unix filesystems, permission checks are 568 * done when the file is open(2)'ed. 569 * ie. once a file is successfully open'ed, 570 * following i/o operations never fail with EACCES. 571 * we try to follow the semantics as far as possible. 572 * 573 * note that we expect that the nfs server always grant 574 * accesses by the file's owner. 575 */ 576 origcred = cred; 577 switch (procnum) { 578 case NFSPROC_READ: 579 case NFSPROC_WRITE: 580 case NFSPROC_COMMIT: 581 uid = np->n_vattr->va_uid; 582 gid = np->n_vattr->va_gid; 583 if (kauth_cred_geteuid(cred) == uid && 584 kauth_cred_getegid(cred) == gid) { 585 retry_cred = false; 586 break; 587 } 588 if (use_opencred) 589 break; 590 kauth_cred_setuid(acred, uid); 591 kauth_cred_seteuid(acred, uid); 592 kauth_cred_setsvuid(acred, uid); 593 kauth_cred_setgid(acred, gid); 594 kauth_cred_setegid(acred, gid); 595 kauth_cred_setsvgid(acred, gid); 596 cred = acred; 597 break; 598 default: 599 retry_cred = false; 600 break; 601 } 602 /* 603 * backup mbuf chain if we can need it later to retry. 604 * 605 * XXX maybe we can keep a direct reference to 606 * mrest without doing m_copym, but it's ...ugly. 607 */ 608 if (retry_cred) 609 mrest_backup = m_copym(mrest, 0, M_COPYALL, M_WAIT); 610 auth_type = RPCAUTH_UNIX; 611 /* XXX elad - ngroups */ 612 auth_len = (((kauth_cred_ngroups(cred) > nmp->nm_numgrps) ? 613 nmp->nm_numgrps : kauth_cred_ngroups(cred)) << 2) + 614 5 * NFSX_UNSIGNED; 615 } 616 m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len, 617 auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid); 618 if (auth_str) 619 free(auth_str, M_TEMP); 620 621 /* 622 * For stream protocols, insert a Sun RPC Record Mark. 623 */ 624 if (nmp->nm_sotype == SOCK_STREAM) { 625 M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); 626 *mtod(m, u_int32_t *) = htonl(0x80000000 | 627 (m->m_pkthdr.len - NFSX_UNSIGNED)); 628 } 629 rep->r_mreq = m; 630 rep->r_xid = xid; 631 tryagain: 632 if (nmp->nm_flag & NFSMNT_SOFT) 633 rep->r_retry = nmp->nm_retry; 634 else 635 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ 636 rep->r_rtt = rep->r_rexmit = 0; 637 if (nfs_proct[procnum] > 0) 638 rep->r_flags = R_TIMING; 639 else 640 rep->r_flags = 0; 641 rep->r_mrep = NULL; 642 643 /* 644 * Do the client side RPC. 645 */ 646 nfsstats.rpcrequests++; 647 /* 648 * Chain request into list of outstanding requests. Be sure 649 * to put it LAST so timer finds oldest requests first. 650 */ 651 s = splsoftnet(); 652 TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain); 653 nfs_timer_start(); 654 655 /* 656 * If backing off another request or avoiding congestion, don't 657 * send this one now but let timer do it. If not timing a request, 658 * do it now. 659 */ 660 if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM || 661 (nmp->nm_flag & NFSMNT_DUMBTIMR) || nmp->nm_sent < nmp->nm_cwnd)) { 662 splx(s); 663 if (nmp->nm_soflags & PR_CONNREQUIRED) 664 error = nfs_sndlock(nmp, rep); 665 if (!error) { 666 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 667 error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep, lwp); 668 if (nmp->nm_soflags & PR_CONNREQUIRED) 669 nfs_sndunlock(nmp); 670 } 671 s = splsoftnet(); 672 if (!error && (rep->r_flags & R_MUSTRESEND) == 0) { 673 if ((rep->r_flags & R_SENT) == 0) { 674 nmp->nm_sent += NFS_CWNDSCALE; 675 rep->r_flags |= R_SENT; 676 } 677 } 678 splx(s); 679 } else { 680 splx(s); 681 rep->r_rtt = -1; 682 } 683 684 /* 685 * Wait for the reply from our send or the timer's. 686 */ 687 if (!error || error == EPIPE || error == EWOULDBLOCK) 688 error = nfs_reply(rep, lwp); 689 690 /* 691 * RPC done, unlink the request. 692 */ 693 s = splsoftnet(); 694 TAILQ_REMOVE(&nfs_reqq, rep, r_chain); 695 696 /* 697 * Decrement the outstanding request count. 698 */ 699 if (rep->r_flags & R_SENT) { 700 rep->r_flags &= ~R_SENT; /* paranoia */ 701 nmp->nm_sent -= NFS_CWNDSCALE; 702 } 703 splx(s); 704 705 if (rexmitp != NULL) { 706 int rexmit; 707 708 if (nmp->nm_sotype != SOCK_DGRAM) 709 rexmit = (rep->r_flags & R_REXMITTED) != 0; 710 else 711 rexmit = rep->r_rexmit; 712 *rexmitp = rexmit; 713 } 714 715 /* 716 * If there was a successful reply and a tprintf msg. 717 * tprintf a response. 718 */ 719 if (!error && (rep->r_flags & R_TPRINTFMSG)) 720 nfs_msg(rep->r_lwp, nmp->nm_mountp->mnt_stat.f_mntfromname, 721 "is alive again"); 722 mrep = rep->r_mrep; 723 md = rep->r_md; 724 dpos = rep->r_dpos; 725 if (error) 726 goto nfsmout; 727 728 /* 729 * break down the rpc header and check if ok 730 */ 731 nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 732 if (*tl++ == rpc_msgdenied) { 733 if (*tl == rpc_mismatch) 734 error = EOPNOTSUPP; 735 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) { 736 if (!failed_auth) { 737 failed_auth++; 738 mheadend->m_next = NULL; 739 m_freem(mrep); 740 m_freem(rep->r_mreq); 741 goto kerbauth; 742 } else 743 error = EAUTH; 744 } else 745 error = EACCES; 746 m_freem(mrep); 747 goto nfsmout; 748 } 749 750 /* 751 * Grab any Kerberos verifier, otherwise just throw it away. 752 */ 753 verf_type = fxdr_unsigned(int, *tl++); 754 i = fxdr_unsigned(int32_t, *tl); 755 if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) { 756 error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep); 757 if (error) 758 goto nfsmout; 759 } else if (i > 0) 760 nfsm_adv(nfsm_rndup(i)); 761 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); 762 /* 0 == ok */ 763 if (*tl == 0) { 764 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); 765 if (*tl != 0) { 766 error = fxdr_unsigned(int, *tl); 767 switch (error) { 768 case NFSERR_PERM: 769 error = EPERM; 770 break; 771 772 case NFSERR_NOENT: 773 error = ENOENT; 774 break; 775 776 case NFSERR_IO: 777 error = EIO; 778 break; 779 780 case NFSERR_NXIO: 781 error = ENXIO; 782 break; 783 784 case NFSERR_ACCES: 785 error = EACCES; 786 if (!retry_cred) 787 break; 788 m_freem(mrep); 789 m_freem(rep->r_mreq); 790 kmem_free(rep, sizeof(*rep)); 791 use_opencred = !use_opencred; 792 if (mrest_backup == NULL) { 793 /* m_copym failure */ 794 KASSERT( 795 kauth_cred_getrefcnt(acred) == 1); 796 kauth_cred_free(acred); 797 return ENOMEM; 798 } 799 mrest = mrest_backup; 800 mrest_backup = NULL; 801 cred = origcred; 802 error = 0; 803 retry_cred = false; 804 goto tryagain_cred; 805 806 case NFSERR_EXIST: 807 error = EEXIST; 808 break; 809 810 case NFSERR_XDEV: 811 error = EXDEV; 812 break; 813 814 case NFSERR_NODEV: 815 error = ENODEV; 816 break; 817 818 case NFSERR_NOTDIR: 819 error = ENOTDIR; 820 break; 821 822 case NFSERR_ISDIR: 823 error = EISDIR; 824 break; 825 826 case NFSERR_INVAL: 827 error = EINVAL; 828 break; 829 830 case NFSERR_FBIG: 831 error = EFBIG; 832 break; 833 834 case NFSERR_NOSPC: 835 error = ENOSPC; 836 break; 837 838 case NFSERR_ROFS: 839 error = EROFS; 840 break; 841 842 case NFSERR_MLINK: 843 error = EMLINK; 844 break; 845 846 case NFSERR_TIMEDOUT: 847 error = ETIMEDOUT; 848 break; 849 850 case NFSERR_NAMETOL: 851 error = ENAMETOOLONG; 852 break; 853 854 case NFSERR_NOTEMPTY: 855 error = ENOTEMPTY; 856 break; 857 858 case NFSERR_DQUOT: 859 error = EDQUOT; 860 break; 861 862 case NFSERR_STALE: 863 /* 864 * If the File Handle was stale, invalidate the 865 * lookup cache, just in case. 866 */ 867 error = ESTALE; 868 cache_purge(NFSTOV(np)); 869 break; 870 871 case NFSERR_REMOTE: 872 error = EREMOTE; 873 break; 874 875 case NFSERR_WFLUSH: 876 case NFSERR_BADHANDLE: 877 case NFSERR_NOT_SYNC: 878 case NFSERR_BAD_COOKIE: 879 error = EINVAL; 880 break; 881 882 case NFSERR_NOTSUPP: 883 error = ENOTSUP; 884 break; 885 886 case NFSERR_TOOSMALL: 887 case NFSERR_SERVERFAULT: 888 case NFSERR_BADTYPE: 889 error = EINVAL; 890 break; 891 892 case NFSERR_TRYLATER: 893 if ((nmp->nm_flag & NFSMNT_NFSV3) == 0) 894 break; 895 m_freem(mrep); 896 error = 0; 897 waituntil = time_second + trylater_delay; 898 while (time_second < waituntil) { 899 kpause("nfstrylater", false, hz, NULL); 900 } 901 trylater_delay *= NFS_TRYLATERDELMUL; 902 if (trylater_delay > NFS_TRYLATERDELMAX) 903 trylater_delay = NFS_TRYLATERDELMAX; 904 /* 905 * RFC1813: 906 * The client should wait and then try 907 * the request with a new RPC transaction ID. 908 */ 909 nfs_renewxid(rep); 910 goto tryagain; 911 912 default: 913 #ifdef DIAGNOSTIC 914 printf("Invalid rpc error code %d\n", error); 915 #endif 916 error = EINVAL; 917 break; 918 } 919 920 if (nmp->nm_flag & NFSMNT_NFSV3) { 921 *mrp = mrep; 922 *mdp = md; 923 *dposp = dpos; 924 error |= NFSERR_RETERR; 925 } else 926 m_freem(mrep); 927 goto nfsmout; 928 } 929 930 /* 931 * note which credential worked to minimize number of retries. 932 */ 933 if (use_opencred) 934 np->n_flag |= NUSEOPENCRED; 935 else 936 np->n_flag &= ~NUSEOPENCRED; 937 938 *mrp = mrep; 939 *mdp = md; 940 *dposp = dpos; 941 942 KASSERT(error == 0); 943 goto nfsmout; 944 } 945 m_freem(mrep); 946 error = EPROTONOSUPPORT; 947 nfsmout: 948 KASSERT(kauth_cred_getrefcnt(acred) == 1); 949 kauth_cred_free(acred); 950 m_freem(rep->r_mreq); 951 kmem_free(rep, sizeof(*rep)); 952 m_freem(mrest_backup); 953 return (error); 954 } 955 956 /* 957 * Lock a socket against others. 958 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply 959 * and also to avoid race conditions between the processes with nfs requests 960 * in progress when a reconnect is necessary. 961 */ 962 static int 963 nfs_sndlock(struct nfsmount *nmp, struct nfsreq *rep) 964 { 965 struct lwp *l; 966 int timeo = 0; 967 bool catch_p = false; 968 int error = 0; 969 970 if (rep) { 971 l = rep->r_lwp; 972 if (rep->r_nmp->nm_flag & NFSMNT_INT) 973 catch_p = true; 974 } else 975 l = NULL; 976 mutex_enter(&nmp->nm_lock); 977 while ((nmp->nm_iflag & NFSMNT_SNDLOCK) != 0) { 978 if (rep && nfs_sigintr(rep->r_nmp, rep, l)) { 979 error = EINTR; 980 goto quit; 981 } 982 if (catch_p) { 983 cv_timedwait_sig(&nmp->nm_sndcv, &nmp->nm_lock, timeo); 984 } else { 985 cv_timedwait(&nmp->nm_sndcv, &nmp->nm_lock, timeo); 986 } 987 if (catch_p) { 988 catch_p = false; 989 timeo = 2 * hz; 990 } 991 } 992 nmp->nm_iflag |= NFSMNT_SNDLOCK; 993 quit: 994 mutex_exit(&nmp->nm_lock); 995 return error; 996 } 997 998 /* 999 * Unlock the stream socket for others. 1000 */ 1001 static void 1002 nfs_sndunlock(struct nfsmount *nmp) 1003 { 1004 1005 mutex_enter(&nmp->nm_lock); 1006 if ((nmp->nm_iflag & NFSMNT_SNDLOCK) == 0) 1007 panic("nfs sndunlock"); 1008 nmp->nm_iflag &= ~NFSMNT_SNDLOCK; 1009 cv_signal(&nmp->nm_sndcv); 1010 mutex_exit(&nmp->nm_lock); 1011 } 1012