1 /* $NetBSD: nfs_clntsocket.c,v 1.5 2016/06/17 14:28:29 christos Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1991, 1993, 1995 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95 35 */ 36 37 /* 38 * Socket operations for use by nfs 39 */ 40 41 #include <sys/cdefs.h> 42 __KERNEL_RCSID(0, "$NetBSD: nfs_clntsocket.c,v 1.5 2016/06/17 14:28:29 christos Exp $"); 43 44 #ifdef _KERNEL_OPT 45 #include "opt_nfs.h" 46 #include "opt_mbuftrace.h" 47 #endif 48 49 #include <sys/param.h> 50 #include <sys/systm.h> 51 #include <sys/evcnt.h> 52 #include <sys/callout.h> 53 #include <sys/proc.h> 54 #include <sys/mount.h> 55 #include <sys/kernel.h> 56 #include <sys/kmem.h> 57 #include <sys/mbuf.h> 58 #include <sys/vnode.h> 59 #include <sys/domain.h> 60 #include <sys/protosw.h> 61 #include <sys/socket.h> 62 #include <sys/socketvar.h> 63 #include <sys/syslog.h> 64 #include <sys/tprintf.h> 65 #include <sys/namei.h> 66 #include <sys/signal.h> 67 #include <sys/signalvar.h> 68 #include <sys/kauth.h> 69 70 #include <netinet/in.h> 71 #include <netinet/tcp.h> 72 73 #include <nfs/rpcv2.h> 74 #include <nfs/nfsproto.h> 75 #include <nfs/nfs.h> 76 #include <nfs/xdr_subs.h> 77 #include <nfs/nfsm_subs.h> 78 #include <nfs/nfsmount.h> 79 #include <nfs/nfsnode.h> 80 #include <nfs/nfsrtt.h> 81 #include <nfs/nfs_var.h> 82 83 static int nfs_sndlock(struct nfsmount *, struct nfsreq *); 84 static void nfs_sndunlock(struct nfsmount *); 85 86 /* 87 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all 88 * done by soreceive(), but for SOCK_STREAM we must deal with the Record 89 * Mark and consolidate the data into a new mbuf list. 90 * nb: Sometimes TCP passes the data up to soreceive() in long lists of 91 * small mbufs. 92 * For SOCK_STREAM we must be very careful to read an entire record once 93 * we have read any of it, even if the system call has been interrupted. 94 */ 95 static int 96 nfs_receive(struct nfsreq *rep, struct mbuf **aname, struct mbuf **mp, 97 struct lwp *l) 98 { 99 struct socket *so; 100 struct uio auio; 101 struct iovec aio; 102 struct mbuf *m; 103 struct mbuf *control; 104 u_int32_t len; 105 struct mbuf **getnam; 106 int error, sotype, rcvflg; 107 108 /* 109 * Set up arguments for soreceive() 110 */ 111 *mp = NULL; 112 *aname = NULL; 113 sotype = rep->r_nmp->nm_sotype; 114 115 /* 116 * For reliable protocols, lock against other senders/receivers 117 * in case a reconnect is necessary. 118 * For SOCK_STREAM, first get the Record Mark to find out how much 119 * more there is to get. 120 * We must lock the socket against other receivers 121 * until we have an entire rpc request/reply. 122 */ 123 if (sotype != SOCK_DGRAM) { 124 error = nfs_sndlock(rep->r_nmp, rep); 125 if (error) 126 return (error); 127 tryagain: 128 /* 129 * Check for fatal errors and resending request. 130 */ 131 /* 132 * Ugh: If a reconnect attempt just happened, nm_so 133 * would have changed. NULL indicates a failed 134 * attempt that has essentially shut down this 135 * mount point. 136 */ 137 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) { 138 nfs_sndunlock(rep->r_nmp); 139 return (EINTR); 140 } 141 so = rep->r_nmp->nm_so; 142 if (!so) { 143 error = nfs_reconnect(rep); 144 if (error) { 145 nfs_sndunlock(rep->r_nmp); 146 return (error); 147 } 148 goto tryagain; 149 } 150 while (rep->r_flags & R_MUSTRESEND) { 151 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 152 nfsstats.rpcretries++; 153 rep->r_rtt = 0; 154 rep->r_flags &= ~R_TIMING; 155 error = nfs_send(so, rep->r_nmp->nm_nam, m, rep, l); 156 if (error) { 157 if (error == EINTR || error == ERESTART || 158 (error = nfs_reconnect(rep)) != 0) { 159 nfs_sndunlock(rep->r_nmp); 160 return (error); 161 } 162 goto tryagain; 163 } 164 } 165 nfs_sndunlock(rep->r_nmp); 166 if (sotype == SOCK_STREAM) { 167 aio.iov_base = (void *) &len; 168 aio.iov_len = sizeof(u_int32_t); 169 auio.uio_iov = &aio; 170 auio.uio_iovcnt = 1; 171 auio.uio_rw = UIO_READ; 172 auio.uio_offset = 0; 173 auio.uio_resid = sizeof(u_int32_t); 174 UIO_SETUP_SYSSPACE(&auio); 175 do { 176 rcvflg = MSG_WAITALL; 177 error = (*so->so_receive)(so, NULL, &auio, 178 NULL, NULL, &rcvflg); 179 if (error == EWOULDBLOCK && rep) { 180 if (rep->r_flags & R_SOFTTERM) 181 return (EINTR); 182 /* 183 * if it seems that the server died after it 184 * received our request, set EPIPE so that 185 * we'll reconnect and retransmit requests. 186 */ 187 if (rep->r_rexmit >= rep->r_nmp->nm_retry) { 188 nfsstats.rpctimeouts++; 189 error = EPIPE; 190 } 191 } 192 } while (error == EWOULDBLOCK); 193 if (!error && auio.uio_resid > 0) { 194 /* 195 * Don't log a 0 byte receive; it means 196 * that the socket has been closed, and 197 * can happen during normal operation 198 * (forcible unmount or Solaris server). 199 */ 200 if (auio.uio_resid != sizeof (u_int32_t)) 201 log(LOG_INFO, 202 "short receive (%lu/%lu) from nfs server %s\n", 203 (u_long)sizeof(u_int32_t) - auio.uio_resid, 204 (u_long)sizeof(u_int32_t), 205 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 206 error = EPIPE; 207 } 208 if (error) 209 goto errout; 210 len = ntohl(len) & ~0x80000000; 211 /* 212 * This is SERIOUS! We are out of sync with the sender 213 * and forcing a disconnect/reconnect is all I can do. 214 */ 215 if (len > NFS_MAXPACKET) { 216 log(LOG_ERR, "%s (%d) from nfs server %s\n", 217 "impossible packet length", 218 len, 219 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 220 error = EFBIG; 221 goto errout; 222 } 223 auio.uio_resid = len; 224 do { 225 rcvflg = MSG_WAITALL; 226 error = (*so->so_receive)(so, NULL, 227 &auio, mp, NULL, &rcvflg); 228 } while (error == EWOULDBLOCK || error == EINTR || 229 error == ERESTART); 230 if (!error && auio.uio_resid > 0) { 231 if (len != auio.uio_resid) 232 log(LOG_INFO, 233 "short receive (%lu/%d) from nfs server %s\n", 234 (u_long)len - auio.uio_resid, len, 235 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 236 error = EPIPE; 237 } 238 } else { 239 /* 240 * NB: Since uio_resid is big, MSG_WAITALL is ignored 241 * and soreceive() will return when it has either a 242 * control msg or a data msg. 243 * We have no use for control msg., but must grab them 244 * and then throw them away so we know what is going 245 * on. 246 */ 247 auio.uio_resid = len = 100000000; /* Anything Big */ 248 /* not need to setup uio_vmspace */ 249 do { 250 rcvflg = 0; 251 error = (*so->so_receive)(so, NULL, 252 &auio, mp, &control, &rcvflg); 253 if (control) 254 m_freem(control); 255 if (error == EWOULDBLOCK && rep) { 256 if (rep->r_flags & R_SOFTTERM) 257 return (EINTR); 258 } 259 } while (error == EWOULDBLOCK || 260 (!error && *mp == NULL && control)); 261 if ((rcvflg & MSG_EOR) == 0) 262 printf("Egad!!\n"); 263 if (!error && *mp == NULL) 264 error = EPIPE; 265 len -= auio.uio_resid; 266 } 267 errout: 268 if (error && error != EINTR && error != ERESTART) { 269 m_freem(*mp); 270 *mp = NULL; 271 if (error != EPIPE) 272 log(LOG_INFO, 273 "receive error %d from nfs server %s\n", 274 error, 275 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 276 error = nfs_sndlock(rep->r_nmp, rep); 277 if (!error) 278 error = nfs_reconnect(rep); 279 if (!error) 280 goto tryagain; 281 else 282 nfs_sndunlock(rep->r_nmp); 283 } 284 } else { 285 if ((so = rep->r_nmp->nm_so) == NULL) 286 return (EACCES); 287 if (so->so_state & SS_ISCONNECTED) 288 getnam = NULL; 289 else 290 getnam = aname; 291 auio.uio_resid = len = 1000000; 292 /* not need to setup uio_vmspace */ 293 do { 294 rcvflg = 0; 295 error = (*so->so_receive)(so, getnam, &auio, mp, 296 NULL, &rcvflg); 297 if (error == EWOULDBLOCK && 298 (rep->r_flags & R_SOFTTERM)) 299 return (EINTR); 300 } while (error == EWOULDBLOCK); 301 len -= auio.uio_resid; 302 if (!error && *mp == NULL) 303 error = EPIPE; 304 } 305 if (error) { 306 m_freem(*mp); 307 *mp = NULL; 308 } 309 return (error); 310 } 311 312 /* 313 * Implement receipt of reply on a socket. 314 * We must search through the list of received datagrams matching them 315 * with outstanding requests using the xid, until ours is found. 316 */ 317 /* ARGSUSED */ 318 static int 319 nfs_reply(struct nfsreq *myrep, struct lwp *lwp) 320 { 321 struct nfsreq *rep; 322 struct nfsmount *nmp = myrep->r_nmp; 323 int32_t t1; 324 struct mbuf *mrep, *nam, *md; 325 u_int32_t rxid, *tl; 326 char *dpos, *cp2; 327 int error, s; 328 329 /* 330 * Loop around until we get our own reply 331 */ 332 for (;;) { 333 /* 334 * Lock against other receivers so that I don't get stuck in 335 * sbwait() after someone else has received my reply for me. 336 * Also necessary for connection based protocols to avoid 337 * race conditions during a reconnect. 338 */ 339 error = nfs_rcvlock(nmp, myrep); 340 if (error == EALREADY) 341 return (0); 342 if (error) 343 return (error); 344 /* 345 * Get the next Rpc reply off the socket 346 */ 347 348 mutex_enter(&nmp->nm_lock); 349 nmp->nm_waiters++; 350 mutex_exit(&nmp->nm_lock); 351 352 error = nfs_receive(myrep, &nam, &mrep, lwp); 353 354 mutex_enter(&nmp->nm_lock); 355 nmp->nm_waiters--; 356 cv_signal(&nmp->nm_disconcv); 357 mutex_exit(&nmp->nm_lock); 358 359 if (error) { 360 nfs_rcvunlock(nmp); 361 362 if (nmp->nm_iflag & NFSMNT_DISMNT) { 363 /* 364 * Oops, we're going away now.. 365 */ 366 return error; 367 } 368 /* 369 * Ignore routing errors on connectionless protocols? ? 370 */ 371 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { 372 nmp->nm_so->so_error = 0; 373 #ifdef DEBUG 374 if (ratecheck(&nfs_reply_last_err_time, 375 &nfs_err_interval)) 376 printf("%s: ignoring error %d\n", 377 __func__, error); 378 #endif 379 continue; 380 } 381 return (error); 382 } 383 if (nam) 384 m_freem(nam); 385 386 /* 387 * Get the xid and check that it is an rpc reply 388 */ 389 md = mrep; 390 dpos = mtod(md, void *); 391 nfsm_dissect(tl, u_int32_t *, 2*NFSX_UNSIGNED); 392 rxid = *tl++; 393 if (*tl != rpc_reply) { 394 nfsstats.rpcinvalid++; 395 m_freem(mrep); 396 nfsmout: 397 nfs_rcvunlock(nmp); 398 continue; 399 } 400 401 /* 402 * Loop through the request list to match up the reply 403 * Iff no match, just drop the datagram 404 */ 405 s = splsoftnet(); 406 TAILQ_FOREACH(rep, &nfs_reqq, r_chain) { 407 if (rep->r_mrep != NULL || rxid != rep->r_xid) 408 continue; 409 410 /* Found it.. */ 411 rep->r_mrep = mrep; 412 rep->r_md = md; 413 rep->r_dpos = dpos; 414 if (nfsrtton) { 415 struct rttl *rt; 416 int proct = nfs_proct[rep->r_procnum]; 417 418 rt = &nfsrtt.rttl[nfsrtt.pos]; 419 rt->proc = rep->r_procnum; 420 rt->rto = NFS_RTO(nmp, proct); 421 rt->sent = nmp->nm_sent; 422 rt->cwnd = nmp->nm_cwnd; 423 rt->srtt = nmp->nm_srtt[proct - 1]; 424 rt->sdrtt = nmp->nm_sdrtt[proct - 1]; 425 rt->fsid = nmp->nm_mountp->mnt_stat.f_fsidx; 426 getmicrotime(&rt->tstamp); 427 if (rep->r_flags & R_TIMING) 428 rt->rtt = rep->r_rtt; 429 else 430 rt->rtt = 1000000; 431 nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ; 432 } 433 /* 434 * Update congestion window. 435 * Do the additive increase of 436 * one rpc/rtt. 437 */ 438 if (nmp->nm_cwnd <= nmp->nm_sent) { 439 nmp->nm_cwnd += 440 (NFS_CWNDSCALE * NFS_CWNDSCALE + 441 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; 442 if (nmp->nm_cwnd > NFS_MAXCWND) 443 nmp->nm_cwnd = NFS_MAXCWND; 444 } 445 rep->r_flags &= ~R_SENT; 446 nmp->nm_sent -= NFS_CWNDSCALE; 447 /* 448 * Update rtt using a gain of 0.125 on the mean 449 * and a gain of 0.25 on the deviation. 450 */ 451 if (rep->r_flags & R_TIMING) { 452 /* 453 * Since the timer resolution of 454 * NFS_HZ is so course, it can often 455 * result in r_rtt == 0. Since 456 * r_rtt == N means that the actual 457 * rtt is between N+dt and N+2-dt ticks, 458 * add 1. 459 */ 460 t1 = rep->r_rtt + 1; 461 t1 -= (NFS_SRTT(rep) >> 3); 462 NFS_SRTT(rep) += t1; 463 if (t1 < 0) 464 t1 = -t1; 465 t1 -= (NFS_SDRTT(rep) >> 2); 466 NFS_SDRTT(rep) += t1; 467 } 468 nmp->nm_timeouts = 0; 469 break; 470 } 471 splx(s); 472 nfs_rcvunlock(nmp); 473 /* 474 * If not matched to a request, drop it. 475 * If it's mine, get out. 476 */ 477 if (rep == 0) { 478 nfsstats.rpcunexpected++; 479 m_freem(mrep); 480 } else if (rep == myrep) { 481 if (rep->r_mrep == NULL) 482 panic("nfsreply nil"); 483 return (0); 484 } 485 } 486 } 487 488 /* 489 * nfs_request - goes something like this 490 * - fill in request struct 491 * - links it into list 492 * - calls nfs_send() for first transmit 493 * - calls nfs_receive() to get reply 494 * - break down rpc header and return with nfs reply pointed to 495 * by mrep or error 496 * nb: always frees up mreq mbuf list 497 */ 498 int 499 nfs_request(struct nfsnode *np, struct mbuf *mrest, int procnum, struct lwp *lwp, kauth_cred_t cred, struct mbuf **mrp, struct mbuf **mdp, char **dposp, int *rexmitp) 500 { 501 struct mbuf *m, *mrep; 502 struct nfsreq *rep; 503 u_int32_t *tl; 504 int i; 505 struct nfsmount *nmp = VFSTONFS(np->n_vnode->v_mount); 506 struct mbuf *md, *mheadend; 507 char nickv[RPCX_NICKVERF]; 508 time_t waituntil; 509 char *dpos, *cp2; 510 int t1, s, error = 0, mrest_len, auth_len, auth_type; 511 int trylater_delay = NFS_TRYLATERDEL, failed_auth = 0; 512 int verf_len, verf_type; 513 u_int32_t xid; 514 char *auth_str, *verf_str; 515 NFSKERBKEY_T key; /* save session key */ 516 kauth_cred_t acred; 517 struct mbuf *mrest_backup = NULL; 518 kauth_cred_t origcred = NULL; /* XXX: gcc */ 519 bool retry_cred = true; 520 bool use_opencred = (np->n_flag & NUSEOPENCRED) != 0; 521 522 if (rexmitp != NULL) 523 *rexmitp = 0; 524 525 acred = kauth_cred_alloc(); 526 527 tryagain_cred: 528 KASSERT(cred != NULL); 529 rep = kmem_alloc(sizeof(*rep), KM_SLEEP); 530 rep->r_nmp = nmp; 531 KASSERT(lwp == NULL || lwp == curlwp); 532 rep->r_lwp = lwp; 533 rep->r_procnum = procnum; 534 i = 0; 535 m = mrest; 536 while (m) { 537 i += m->m_len; 538 m = m->m_next; 539 } 540 mrest_len = i; 541 542 /* 543 * Get the RPC header with authorization. 544 */ 545 kerbauth: 546 verf_str = auth_str = NULL; 547 if (nmp->nm_flag & NFSMNT_KERB) { 548 verf_str = nickv; 549 verf_len = sizeof (nickv); 550 auth_type = RPCAUTH_KERB4; 551 memset((void *)key, 0, sizeof (key)); 552 if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str, 553 &auth_len, verf_str, verf_len)) { 554 error = nfs_getauth(nmp, rep, cred, &auth_str, 555 &auth_len, verf_str, &verf_len, key); 556 if (error) { 557 kmem_free(rep, sizeof(*rep)); 558 m_freem(mrest); 559 KASSERT(kauth_cred_getrefcnt(acred) == 1); 560 kauth_cred_free(acred); 561 return (error); 562 } 563 } 564 retry_cred = false; 565 } else { 566 /* AUTH_UNIX */ 567 uid_t uid; 568 gid_t gid; 569 570 /* 571 * on the most unix filesystems, permission checks are 572 * done when the file is open(2)'ed. 573 * ie. once a file is successfully open'ed, 574 * following i/o operations never fail with EACCES. 575 * we try to follow the semantics as far as possible. 576 * 577 * note that we expect that the nfs server always grant 578 * accesses by the file's owner. 579 */ 580 origcred = cred; 581 switch (procnum) { 582 case NFSPROC_READ: 583 case NFSPROC_WRITE: 584 case NFSPROC_COMMIT: 585 uid = np->n_vattr->va_uid; 586 gid = np->n_vattr->va_gid; 587 if (kauth_cred_geteuid(cred) == uid && 588 kauth_cred_getegid(cred) == gid) { 589 retry_cred = false; 590 break; 591 } 592 if (use_opencred) 593 break; 594 kauth_cred_setuid(acred, uid); 595 kauth_cred_seteuid(acred, uid); 596 kauth_cred_setsvuid(acred, uid); 597 kauth_cred_setgid(acred, gid); 598 kauth_cred_setegid(acred, gid); 599 kauth_cred_setsvgid(acred, gid); 600 cred = acred; 601 break; 602 default: 603 retry_cred = false; 604 break; 605 } 606 /* 607 * backup mbuf chain if we can need it later to retry. 608 * 609 * XXX maybe we can keep a direct reference to 610 * mrest without doing m_copym, but it's ...ugly. 611 */ 612 if (retry_cred) 613 mrest_backup = m_copym(mrest, 0, M_COPYALL, M_WAIT); 614 auth_type = RPCAUTH_UNIX; 615 /* XXX elad - ngroups */ 616 auth_len = (((kauth_cred_ngroups(cred) > nmp->nm_numgrps) ? 617 nmp->nm_numgrps : kauth_cred_ngroups(cred)) << 2) + 618 5 * NFSX_UNSIGNED; 619 } 620 m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len, 621 auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid); 622 if (auth_str) 623 free(auth_str, M_TEMP); 624 625 /* 626 * For stream protocols, insert a Sun RPC Record Mark. 627 */ 628 if (nmp->nm_sotype == SOCK_STREAM) { 629 M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); 630 *mtod(m, u_int32_t *) = htonl(0x80000000 | 631 (m->m_pkthdr.len - NFSX_UNSIGNED)); 632 } 633 rep->r_mreq = m; 634 rep->r_xid = xid; 635 tryagain: 636 if (nmp->nm_flag & NFSMNT_SOFT) 637 rep->r_retry = nmp->nm_retry; 638 else 639 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ 640 rep->r_rtt = rep->r_rexmit = 0; 641 if (nfs_proct[procnum] > 0) 642 rep->r_flags = R_TIMING; 643 else 644 rep->r_flags = 0; 645 rep->r_mrep = NULL; 646 647 /* 648 * Do the client side RPC. 649 */ 650 nfsstats.rpcrequests++; 651 /* 652 * Chain request into list of outstanding requests. Be sure 653 * to put it LAST so timer finds oldest requests first. 654 */ 655 s = splsoftnet(); 656 TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain); 657 nfs_timer_start(); 658 659 /* 660 * If backing off another request or avoiding congestion, don't 661 * send this one now but let timer do it. If not timing a request, 662 * do it now. 663 */ 664 if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM || 665 (nmp->nm_flag & NFSMNT_DUMBTIMR) || nmp->nm_sent < nmp->nm_cwnd)) { 666 splx(s); 667 if (nmp->nm_soflags & PR_CONNREQUIRED) 668 error = nfs_sndlock(nmp, rep); 669 if (!error) { 670 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 671 error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep, lwp); 672 if (nmp->nm_soflags & PR_CONNREQUIRED) 673 nfs_sndunlock(nmp); 674 } 675 s = splsoftnet(); 676 if (!error && (rep->r_flags & R_MUSTRESEND) == 0) { 677 if ((rep->r_flags & R_SENT) == 0) { 678 nmp->nm_sent += NFS_CWNDSCALE; 679 rep->r_flags |= R_SENT; 680 } 681 } 682 splx(s); 683 } else { 684 splx(s); 685 rep->r_rtt = -1; 686 } 687 688 /* 689 * Wait for the reply from our send or the timer's. 690 */ 691 if (!error || error == EPIPE || error == EWOULDBLOCK) 692 error = nfs_reply(rep, lwp); 693 694 /* 695 * RPC done, unlink the request. 696 */ 697 s = splsoftnet(); 698 TAILQ_REMOVE(&nfs_reqq, rep, r_chain); 699 700 /* 701 * Decrement the outstanding request count. 702 */ 703 if (rep->r_flags & R_SENT) { 704 rep->r_flags &= ~R_SENT; /* paranoia */ 705 nmp->nm_sent -= NFS_CWNDSCALE; 706 } 707 splx(s); 708 709 if (rexmitp != NULL) { 710 int rexmit; 711 712 if (nmp->nm_sotype != SOCK_DGRAM) 713 rexmit = (rep->r_flags & R_REXMITTED) != 0; 714 else 715 rexmit = rep->r_rexmit; 716 *rexmitp = rexmit; 717 } 718 719 /* 720 * If there was a successful reply and a tprintf msg. 721 * tprintf a response. 722 */ 723 if (!error && (rep->r_flags & R_TPRINTFMSG)) 724 nfs_msg(rep->r_lwp, nmp->nm_mountp->mnt_stat.f_mntfromname, 725 "is alive again"); 726 mrep = rep->r_mrep; 727 md = rep->r_md; 728 dpos = rep->r_dpos; 729 if (error) 730 goto nfsmout; 731 732 /* 733 * break down the rpc header and check if ok 734 */ 735 nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 736 if (*tl++ == rpc_msgdenied) { 737 if (*tl == rpc_mismatch) 738 error = EOPNOTSUPP; 739 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) { 740 if (!failed_auth) { 741 failed_auth++; 742 mheadend->m_next = NULL; 743 m_freem(mrep); 744 m_freem(rep->r_mreq); 745 goto kerbauth; 746 } else 747 error = EAUTH; 748 } else 749 error = EACCES; 750 m_freem(mrep); 751 goto nfsmout; 752 } 753 754 /* 755 * Grab any Kerberos verifier, otherwise just throw it away. 756 */ 757 verf_type = fxdr_unsigned(int, *tl++); 758 i = fxdr_unsigned(int32_t, *tl); 759 if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) { 760 error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep); 761 if (error) 762 goto nfsmout; 763 } else if (i > 0) 764 nfsm_adv(nfsm_rndup(i)); 765 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); 766 /* 0 == ok */ 767 if (*tl == 0) { 768 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); 769 if (*tl != 0) { 770 error = fxdr_unsigned(int, *tl); 771 switch (error) { 772 case NFSERR_PERM: 773 error = EPERM; 774 break; 775 776 case NFSERR_NOENT: 777 error = ENOENT; 778 break; 779 780 case NFSERR_IO: 781 error = EIO; 782 break; 783 784 case NFSERR_NXIO: 785 error = ENXIO; 786 break; 787 788 case NFSERR_ACCES: 789 error = EACCES; 790 if (!retry_cred) 791 break; 792 m_freem(mrep); 793 m_freem(rep->r_mreq); 794 kmem_free(rep, sizeof(*rep)); 795 use_opencred = !use_opencred; 796 if (mrest_backup == NULL) { 797 /* m_copym failure */ 798 KASSERT( 799 kauth_cred_getrefcnt(acred) == 1); 800 kauth_cred_free(acred); 801 return ENOMEM; 802 } 803 mrest = mrest_backup; 804 mrest_backup = NULL; 805 cred = origcred; 806 error = 0; 807 retry_cred = false; 808 goto tryagain_cred; 809 810 case NFSERR_EXIST: 811 error = EEXIST; 812 break; 813 814 case NFSERR_XDEV: 815 error = EXDEV; 816 break; 817 818 case NFSERR_NODEV: 819 error = ENODEV; 820 break; 821 822 case NFSERR_NOTDIR: 823 error = ENOTDIR; 824 break; 825 826 case NFSERR_ISDIR: 827 error = EISDIR; 828 break; 829 830 case NFSERR_INVAL: 831 error = EINVAL; 832 break; 833 834 case NFSERR_FBIG: 835 error = EFBIG; 836 break; 837 838 case NFSERR_NOSPC: 839 error = ENOSPC; 840 break; 841 842 case NFSERR_ROFS: 843 error = EROFS; 844 break; 845 846 case NFSERR_MLINK: 847 error = EMLINK; 848 break; 849 850 case NFSERR_TIMEDOUT: 851 error = ETIMEDOUT; 852 break; 853 854 case NFSERR_NAMETOL: 855 error = ENAMETOOLONG; 856 break; 857 858 case NFSERR_NOTEMPTY: 859 error = ENOTEMPTY; 860 break; 861 862 case NFSERR_DQUOT: 863 error = EDQUOT; 864 break; 865 866 case NFSERR_STALE: 867 /* 868 * If the File Handle was stale, invalidate the 869 * lookup cache, just in case. 870 */ 871 error = ESTALE; 872 cache_purge(NFSTOV(np)); 873 break; 874 875 case NFSERR_REMOTE: 876 error = EREMOTE; 877 break; 878 879 case NFSERR_WFLUSH: 880 case NFSERR_BADHANDLE: 881 case NFSERR_NOT_SYNC: 882 case NFSERR_BAD_COOKIE: 883 error = EINVAL; 884 break; 885 886 case NFSERR_NOTSUPP: 887 error = ENOTSUP; 888 break; 889 890 case NFSERR_TOOSMALL: 891 case NFSERR_SERVERFAULT: 892 case NFSERR_BADTYPE: 893 error = EINVAL; 894 break; 895 896 case NFSERR_TRYLATER: 897 if ((nmp->nm_flag & NFSMNT_NFSV3) == 0) 898 break; 899 m_freem(mrep); 900 error = 0; 901 waituntil = time_second + trylater_delay; 902 while (time_second < waituntil) { 903 kpause("nfstrylater", false, hz, NULL); 904 } 905 trylater_delay *= NFS_TRYLATERDELMUL; 906 if (trylater_delay > NFS_TRYLATERDELMAX) 907 trylater_delay = NFS_TRYLATERDELMAX; 908 /* 909 * RFC1813: 910 * The client should wait and then try 911 * the request with a new RPC transaction ID. 912 */ 913 nfs_renewxid(rep); 914 goto tryagain; 915 916 default: 917 #ifdef DIAGNOSTIC 918 printf("Invalid rpc error code %d\n", error); 919 #endif 920 error = EINVAL; 921 break; 922 } 923 924 if (nmp->nm_flag & NFSMNT_NFSV3) { 925 *mrp = mrep; 926 *mdp = md; 927 *dposp = dpos; 928 error |= NFSERR_RETERR; 929 } else 930 m_freem(mrep); 931 goto nfsmout; 932 } 933 934 /* 935 * note which credential worked to minimize number of retries. 936 */ 937 if (use_opencred) 938 np->n_flag |= NUSEOPENCRED; 939 else 940 np->n_flag &= ~NUSEOPENCRED; 941 942 *mrp = mrep; 943 *mdp = md; 944 *dposp = dpos; 945 946 KASSERT(error == 0); 947 goto nfsmout; 948 } 949 m_freem(mrep); 950 error = EPROTONOSUPPORT; 951 nfsmout: 952 KASSERT(kauth_cred_getrefcnt(acred) == 1); 953 kauth_cred_free(acred); 954 m_freem(rep->r_mreq); 955 kmem_free(rep, sizeof(*rep)); 956 m_freem(mrest_backup); 957 return (error); 958 } 959 960 /* 961 * Lock a socket against others. 962 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply 963 * and also to avoid race conditions between the processes with nfs requests 964 * in progress when a reconnect is necessary. 965 */ 966 static int 967 nfs_sndlock(struct nfsmount *nmp, struct nfsreq *rep) 968 { 969 struct lwp *l; 970 int timeo = 0; 971 bool catch_p = false; 972 int error = 0; 973 974 if (nmp->nm_flag & NFSMNT_SOFT) 975 timeo = nmp->nm_retry * nmp->nm_timeo; 976 977 if (nmp->nm_iflag & NFSMNT_DISMNTFORCE) 978 timeo = hz; 979 980 if (rep) { 981 l = rep->r_lwp; 982 if (rep->r_nmp->nm_flag & NFSMNT_INT) 983 catch_p = true; 984 } else 985 l = NULL; 986 mutex_enter(&nmp->nm_lock); 987 while ((nmp->nm_iflag & NFSMNT_SNDLOCK) != 0) { 988 if (rep && nfs_sigintr(rep->r_nmp, rep, l)) { 989 error = EINTR; 990 goto quit; 991 } 992 if (catch_p) { 993 error = cv_timedwait_sig(&nmp->nm_sndcv, 994 &nmp->nm_lock, timeo); 995 } else { 996 error = cv_timedwait(&nmp->nm_sndcv, 997 &nmp->nm_lock, timeo); 998 } 999 1000 if (error) { 1001 if ((error == EWOULDBLOCK) && 1002 (nmp->nm_flag & NFSMNT_SOFT)) { 1003 error = EIO; 1004 goto quit; 1005 } 1006 error = 0; 1007 } 1008 if (catch_p) { 1009 catch_p = false; 1010 timeo = 2 * hz; 1011 } 1012 } 1013 nmp->nm_iflag |= NFSMNT_SNDLOCK; 1014 quit: 1015 mutex_exit(&nmp->nm_lock); 1016 return error; 1017 } 1018 1019 /* 1020 * Unlock the stream socket for others. 1021 */ 1022 static void 1023 nfs_sndunlock(struct nfsmount *nmp) 1024 { 1025 1026 mutex_enter(&nmp->nm_lock); 1027 if ((nmp->nm_iflag & NFSMNT_SNDLOCK) == 0) 1028 panic("nfs sndunlock"); 1029 nmp->nm_iflag &= ~NFSMNT_SNDLOCK; 1030 cv_signal(&nmp->nm_sndcv); 1031 mutex_exit(&nmp->nm_lock); 1032 } 1033