1 /* $NetBSD: nfs_clntsocket.c,v 1.6 2018/01/21 20:36:49 christos Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1991, 1993, 1995 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95 35 */ 36 37 /* 38 * Socket operations for use by nfs 39 */ 40 41 #include <sys/cdefs.h> 42 __KERNEL_RCSID(0, "$NetBSD: nfs_clntsocket.c,v 1.6 2018/01/21 20:36:49 christos Exp $"); 43 44 #ifdef _KERNEL_OPT 45 #include "opt_nfs.h" 46 #include "opt_mbuftrace.h" 47 #endif 48 49 #include <sys/param.h> 50 #include <sys/systm.h> 51 #include <sys/evcnt.h> 52 #include <sys/callout.h> 53 #include <sys/proc.h> 54 #include <sys/mount.h> 55 #include <sys/kernel.h> 56 #include <sys/kmem.h> 57 #include <sys/mbuf.h> 58 #include <sys/vnode.h> 59 #include <sys/domain.h> 60 #include <sys/protosw.h> 61 #include <sys/socket.h> 62 #include <sys/socketvar.h> 63 #include <sys/syslog.h> 64 #include <sys/tprintf.h> 65 #include <sys/namei.h> 66 #include <sys/signal.h> 67 #include <sys/signalvar.h> 68 #include <sys/kauth.h> 69 70 #include <netinet/in.h> 71 #include <netinet/tcp.h> 72 73 #include <nfs/rpcv2.h> 74 #include <nfs/nfsproto.h> 75 #include <nfs/nfs.h> 76 #include <nfs/xdr_subs.h> 77 #include <nfs/nfsm_subs.h> 78 #include <nfs/nfsmount.h> 79 #include <nfs/nfsnode.h> 80 #include <nfs/nfsrtt.h> 81 #include <nfs/nfs_var.h> 82 83 static int nfs_sndlock(struct nfsmount *, struct nfsreq *); 84 static void nfs_sndunlock(struct nfsmount *); 85 86 /* 87 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all 88 * done by soreceive(), but for SOCK_STREAM we must deal with the Record 89 * Mark and consolidate the data into a new mbuf list. 90 * nb: Sometimes TCP passes the data up to soreceive() in long lists of 91 * small mbufs. 92 * For SOCK_STREAM we must be very careful to read an entire record once 93 * we have read any of it, even if the system call has been interrupted. 94 */ 95 static int 96 nfs_receive(struct nfsreq *rep, struct mbuf **aname, struct mbuf **mp, 97 struct lwp *l) 98 { 99 struct socket *so; 100 struct uio auio; 101 struct iovec aio; 102 struct mbuf *m; 103 struct mbuf *control; 104 u_int32_t len; 105 struct mbuf **getnam; 106 int error, sotype, rcvflg; 107 108 /* 109 * Set up arguments for soreceive() 110 */ 111 *mp = NULL; 112 *aname = NULL; 113 sotype = rep->r_nmp->nm_sotype; 114 115 /* 116 * For reliable protocols, lock against other senders/receivers 117 * in case a reconnect is necessary. 118 * For SOCK_STREAM, first get the Record Mark to find out how much 119 * more there is to get. 120 * We must lock the socket against other receivers 121 * until we have an entire rpc request/reply. 122 */ 123 if (sotype != SOCK_DGRAM) { 124 error = nfs_sndlock(rep->r_nmp, rep); 125 if (error) 126 return (error); 127 tryagain: 128 /* 129 * Check for fatal errors and resending request. 130 */ 131 /* 132 * Ugh: If a reconnect attempt just happened, nm_so 133 * would have changed. NULL indicates a failed 134 * attempt that has essentially shut down this 135 * mount point. 136 */ 137 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) { 138 nfs_sndunlock(rep->r_nmp); 139 return (EINTR); 140 } 141 so = rep->r_nmp->nm_so; 142 if (!so) { 143 error = nfs_reconnect(rep); 144 if (error) { 145 nfs_sndunlock(rep->r_nmp); 146 return (error); 147 } 148 goto tryagain; 149 } 150 while (rep->r_flags & R_MUSTRESEND) { 151 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 152 nfsstats.rpcretries++; 153 rep->r_rtt = 0; 154 rep->r_flags &= ~R_TIMING; 155 error = nfs_send(so, rep->r_nmp->nm_nam, m, rep, l); 156 if (error) { 157 if (error == EINTR || error == ERESTART || 158 (error = nfs_reconnect(rep)) != 0) { 159 nfs_sndunlock(rep->r_nmp); 160 return (error); 161 } 162 goto tryagain; 163 } 164 } 165 nfs_sndunlock(rep->r_nmp); 166 if (sotype == SOCK_STREAM) { 167 aio.iov_base = (void *) &len; 168 aio.iov_len = sizeof(u_int32_t); 169 auio.uio_iov = &aio; 170 auio.uio_iovcnt = 1; 171 auio.uio_rw = UIO_READ; 172 auio.uio_offset = 0; 173 auio.uio_resid = sizeof(u_int32_t); 174 UIO_SETUP_SYSSPACE(&auio); 175 do { 176 rcvflg = MSG_WAITALL; 177 error = (*so->so_receive)(so, NULL, &auio, 178 NULL, NULL, &rcvflg); 179 if (error == EWOULDBLOCK && rep) { 180 if (rep->r_flags & R_SOFTTERM) 181 return (EINTR); 182 /* 183 * if it seems that the server died after it 184 * received our request, set EPIPE so that 185 * we'll reconnect and retransmit requests. 186 */ 187 if (rep->r_rexmit >= rep->r_nmp->nm_retry) { 188 nfsstats.rpctimeouts++; 189 error = EPIPE; 190 } 191 } 192 } while (error == EWOULDBLOCK); 193 if (!error && auio.uio_resid > 0) { 194 /* 195 * Don't log a 0 byte receive; it means 196 * that the socket has been closed, and 197 * can happen during normal operation 198 * (forcible unmount or Solaris server). 199 */ 200 if (auio.uio_resid != sizeof (u_int32_t)) 201 log(LOG_INFO, 202 "short receive (%lu/%lu) from nfs server %s\n", 203 (u_long)sizeof(u_int32_t) - auio.uio_resid, 204 (u_long)sizeof(u_int32_t), 205 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 206 error = EPIPE; 207 } 208 if (error) 209 goto errout; 210 len = ntohl(len) & ~0x80000000; 211 /* 212 * This is SERIOUS! We are out of sync with the sender 213 * and forcing a disconnect/reconnect is all I can do. 214 */ 215 if (len > NFS_MAXPACKET) { 216 log(LOG_ERR, "%s (%d) from nfs server %s\n", 217 "impossible packet length", 218 len, 219 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 220 error = EFBIG; 221 goto errout; 222 } 223 auio.uio_resid = len; 224 do { 225 rcvflg = MSG_WAITALL; 226 error = (*so->so_receive)(so, NULL, 227 &auio, mp, NULL, &rcvflg); 228 } while (error == EWOULDBLOCK || error == EINTR || 229 error == ERESTART); 230 if (!error && auio.uio_resid > 0) { 231 if (len != auio.uio_resid) 232 log(LOG_INFO, 233 "short receive (%lu/%d) from nfs server %s\n", 234 (u_long)len - auio.uio_resid, len, 235 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 236 error = EPIPE; 237 } 238 } else { 239 /* 240 * NB: Since uio_resid is big, MSG_WAITALL is ignored 241 * and soreceive() will return when it has either a 242 * control msg or a data msg. 243 * We have no use for control msg., but must grab them 244 * and then throw them away so we know what is going 245 * on. 246 */ 247 auio.uio_resid = len = 100000000; /* Anything Big */ 248 /* not need to setup uio_vmspace */ 249 do { 250 rcvflg = 0; 251 error = (*so->so_receive)(so, NULL, 252 &auio, mp, &control, &rcvflg); 253 if (control) 254 m_freem(control); 255 if (error == EWOULDBLOCK && rep) { 256 if (rep->r_flags & R_SOFTTERM) 257 return (EINTR); 258 } 259 } while (error == EWOULDBLOCK || 260 (!error && *mp == NULL && control)); 261 if ((rcvflg & MSG_EOR) == 0) 262 printf("Egad!!\n"); 263 if (!error && *mp == NULL) 264 error = EPIPE; 265 len -= auio.uio_resid; 266 } 267 errout: 268 if (error && error != EINTR && error != ERESTART) { 269 m_freem(*mp); 270 *mp = NULL; 271 if (error != EPIPE) 272 log(LOG_INFO, 273 "receive error %d from nfs server %s\n", 274 error, 275 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 276 error = nfs_sndlock(rep->r_nmp, rep); 277 if (!error) 278 error = nfs_reconnect(rep); 279 if (!error) 280 goto tryagain; 281 else 282 nfs_sndunlock(rep->r_nmp); 283 } 284 } else { 285 if ((so = rep->r_nmp->nm_so) == NULL) 286 return (EACCES); 287 if (so->so_state & SS_ISCONNECTED) 288 getnam = NULL; 289 else 290 getnam = aname; 291 auio.uio_resid = len = 1000000; 292 /* not need to setup uio_vmspace */ 293 do { 294 rcvflg = 0; 295 error = (*so->so_receive)(so, getnam, &auio, mp, 296 NULL, &rcvflg); 297 if (error == EWOULDBLOCK) { 298 int intr = nfs_sigintr(rep->r_nmp, rep, l); 299 if (intr) 300 error = intr; 301 } 302 } while (error == EWOULDBLOCK); 303 len -= auio.uio_resid; 304 if (!error && *mp == NULL) 305 error = EPIPE; 306 } 307 if (error) { 308 m_freem(*mp); 309 *mp = NULL; 310 } 311 return (error); 312 } 313 314 /* 315 * Implement receipt of reply on a socket. 316 * We must search through the list of received datagrams matching them 317 * with outstanding requests using the xid, until ours is found. 318 */ 319 /* ARGSUSED */ 320 static int 321 nfs_reply(struct nfsreq *myrep, struct lwp *lwp) 322 { 323 struct nfsreq *rep; 324 struct nfsmount *nmp = myrep->r_nmp; 325 int32_t t1; 326 struct mbuf *mrep, *nam, *md; 327 u_int32_t rxid, *tl; 328 char *dpos, *cp2; 329 int error, s; 330 331 /* 332 * Loop around until we get our own reply 333 */ 334 for (;;) { 335 /* 336 * Lock against other receivers so that I don't get stuck in 337 * sbwait() after someone else has received my reply for me. 338 * Also necessary for connection based protocols to avoid 339 * race conditions during a reconnect. 340 */ 341 error = nfs_rcvlock(nmp, myrep); 342 if (error == EALREADY) 343 return (0); 344 if (error) 345 return (error); 346 /* 347 * Get the next Rpc reply off the socket 348 */ 349 350 mutex_enter(&nmp->nm_lock); 351 nmp->nm_waiters++; 352 mutex_exit(&nmp->nm_lock); 353 354 error = nfs_receive(myrep, &nam, &mrep, lwp); 355 356 mutex_enter(&nmp->nm_lock); 357 nmp->nm_waiters--; 358 cv_signal(&nmp->nm_disconcv); 359 mutex_exit(&nmp->nm_lock); 360 361 if (error) { 362 nfs_rcvunlock(nmp); 363 364 if (nmp->nm_iflag & NFSMNT_DISMNT) { 365 /* 366 * Oops, we're going away now.. 367 */ 368 return error; 369 } 370 /* 371 * Ignore routing errors on connectionless protocols? ? 372 */ 373 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { 374 nmp->nm_so->so_error = 0; 375 #ifdef DEBUG 376 if (ratecheck(&nfs_reply_last_err_time, 377 &nfs_err_interval)) 378 printf("%s: ignoring error %d\n", 379 __func__, error); 380 #endif 381 continue; 382 } 383 return (error); 384 } 385 if (nam) 386 m_freem(nam); 387 388 /* 389 * Get the xid and check that it is an rpc reply 390 */ 391 md = mrep; 392 dpos = mtod(md, void *); 393 nfsm_dissect(tl, u_int32_t *, 2*NFSX_UNSIGNED); 394 rxid = *tl++; 395 if (*tl != rpc_reply) { 396 nfsstats.rpcinvalid++; 397 m_freem(mrep); 398 nfsmout: 399 nfs_rcvunlock(nmp); 400 continue; 401 } 402 403 /* 404 * Loop through the request list to match up the reply 405 * Iff no match, just drop the datagram 406 */ 407 s = splsoftnet(); 408 mutex_enter(&nfs_reqq_lock); 409 TAILQ_FOREACH(rep, &nfs_reqq, r_chain) { 410 if (rep->r_mrep != NULL || rxid != rep->r_xid) 411 continue; 412 413 /* Found it.. */ 414 rep->r_mrep = mrep; 415 rep->r_md = md; 416 rep->r_dpos = dpos; 417 if (nfsrtton) { 418 struct rttl *rt; 419 int proct = nfs_proct[rep->r_procnum]; 420 421 rt = &nfsrtt.rttl[nfsrtt.pos]; 422 rt->proc = rep->r_procnum; 423 rt->rto = NFS_RTO(nmp, proct); 424 rt->sent = nmp->nm_sent; 425 rt->cwnd = nmp->nm_cwnd; 426 rt->srtt = nmp->nm_srtt[proct - 1]; 427 rt->sdrtt = nmp->nm_sdrtt[proct - 1]; 428 rt->fsid = nmp->nm_mountp->mnt_stat.f_fsidx; 429 getmicrotime(&rt->tstamp); 430 if (rep->r_flags & R_TIMING) 431 rt->rtt = rep->r_rtt; 432 else 433 rt->rtt = 1000000; 434 nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ; 435 } 436 /* 437 * Update congestion window. 438 * Do the additive increase of 439 * one rpc/rtt. 440 */ 441 if (nmp->nm_cwnd <= nmp->nm_sent) { 442 nmp->nm_cwnd += 443 (NFS_CWNDSCALE * NFS_CWNDSCALE + 444 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; 445 if (nmp->nm_cwnd > NFS_MAXCWND) 446 nmp->nm_cwnd = NFS_MAXCWND; 447 } 448 rep->r_flags &= ~R_SENT; 449 nmp->nm_sent -= NFS_CWNDSCALE; 450 /* 451 * Update rtt using a gain of 0.125 on the mean 452 * and a gain of 0.25 on the deviation. 453 */ 454 if (rep->r_flags & R_TIMING) { 455 /* 456 * Since the timer resolution of 457 * NFS_HZ is so course, it can often 458 * result in r_rtt == 0. Since 459 * r_rtt == N means that the actual 460 * rtt is between N+dt and N+2-dt ticks, 461 * add 1. 462 */ 463 t1 = rep->r_rtt + 1; 464 t1 -= (NFS_SRTT(rep) >> 3); 465 NFS_SRTT(rep) += t1; 466 if (t1 < 0) 467 t1 = -t1; 468 t1 -= (NFS_SDRTT(rep) >> 2); 469 NFS_SDRTT(rep) += t1; 470 } 471 nmp->nm_timeouts = 0; 472 break; 473 } 474 mutex_exit(&nfs_reqq_lock); 475 splx(s); 476 nfs_rcvunlock(nmp); 477 /* 478 * If not matched to a request, drop it. 479 * If it's mine, get out. 480 */ 481 if (rep == 0) { 482 nfsstats.rpcunexpected++; 483 m_freem(mrep); 484 } else if (rep == myrep) { 485 if (rep->r_mrep == NULL) 486 panic("nfsreply nil"); 487 return (0); 488 } 489 } 490 } 491 492 /* 493 * nfs_request - goes something like this 494 * - fill in request struct 495 * - links it into list 496 * - calls nfs_send() for first transmit 497 * - calls nfs_receive() to get reply 498 * - break down rpc header and return with nfs reply pointed to 499 * by mrep or error 500 * nb: always frees up mreq mbuf list 501 */ 502 int 503 nfs_request(struct nfsnode *np, struct mbuf *mrest, int procnum, struct lwp *lwp, kauth_cred_t cred, struct mbuf **mrp, struct mbuf **mdp, char **dposp, int *rexmitp) 504 { 505 struct mbuf *m, *mrep; 506 struct nfsreq *rep; 507 u_int32_t *tl; 508 int i; 509 struct nfsmount *nmp = VFSTONFS(np->n_vnode->v_mount); 510 struct mbuf *md, *mheadend; 511 char nickv[RPCX_NICKVERF]; 512 time_t waituntil; 513 char *dpos, *cp2; 514 int t1, s, error = 0, mrest_len, auth_len, auth_type; 515 int trylater_delay = NFS_TRYLATERDEL, failed_auth = 0; 516 int verf_len, verf_type; 517 u_int32_t xid; 518 char *auth_str, *verf_str; 519 NFSKERBKEY_T key; /* save session key */ 520 kauth_cred_t acred; 521 struct mbuf *mrest_backup = NULL; 522 kauth_cred_t origcred = NULL; /* XXX: gcc */ 523 bool retry_cred = true; 524 bool use_opencred = (np->n_flag & NUSEOPENCRED) != 0; 525 526 if (rexmitp != NULL) 527 *rexmitp = 0; 528 529 acred = kauth_cred_alloc(); 530 531 tryagain_cred: 532 KASSERT(cred != NULL); 533 rep = kmem_alloc(sizeof(*rep), KM_SLEEP); 534 rep->r_nmp = nmp; 535 KASSERT(lwp == NULL || lwp == curlwp); 536 rep->r_lwp = lwp; 537 rep->r_procnum = procnum; 538 i = 0; 539 m = mrest; 540 while (m) { 541 i += m->m_len; 542 m = m->m_next; 543 } 544 mrest_len = i; 545 546 /* 547 * Get the RPC header with authorization. 548 */ 549 kerbauth: 550 verf_str = auth_str = NULL; 551 if (nmp->nm_flag & NFSMNT_KERB) { 552 verf_str = nickv; 553 verf_len = sizeof (nickv); 554 auth_type = RPCAUTH_KERB4; 555 memset((void *)key, 0, sizeof (key)); 556 if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str, 557 &auth_len, verf_str, verf_len)) { 558 error = nfs_getauth(nmp, rep, cred, &auth_str, 559 &auth_len, verf_str, &verf_len, key); 560 if (error) { 561 kmem_free(rep, sizeof(*rep)); 562 m_freem(mrest); 563 KASSERT(kauth_cred_getrefcnt(acred) == 1); 564 kauth_cred_free(acred); 565 return (error); 566 } 567 } 568 retry_cred = false; 569 } else { 570 /* AUTH_UNIX */ 571 uid_t uid; 572 gid_t gid; 573 574 /* 575 * on the most unix filesystems, permission checks are 576 * done when the file is open(2)'ed. 577 * ie. once a file is successfully open'ed, 578 * following i/o operations never fail with EACCES. 579 * we try to follow the semantics as far as possible. 580 * 581 * note that we expect that the nfs server always grant 582 * accesses by the file's owner. 583 */ 584 origcred = cred; 585 switch (procnum) { 586 case NFSPROC_READ: 587 case NFSPROC_WRITE: 588 case NFSPROC_COMMIT: 589 uid = np->n_vattr->va_uid; 590 gid = np->n_vattr->va_gid; 591 if (kauth_cred_geteuid(cred) == uid && 592 kauth_cred_getegid(cred) == gid) { 593 retry_cred = false; 594 break; 595 } 596 if (use_opencred) 597 break; 598 kauth_cred_setuid(acred, uid); 599 kauth_cred_seteuid(acred, uid); 600 kauth_cred_setsvuid(acred, uid); 601 kauth_cred_setgid(acred, gid); 602 kauth_cred_setegid(acred, gid); 603 kauth_cred_setsvgid(acred, gid); 604 cred = acred; 605 break; 606 default: 607 retry_cred = false; 608 break; 609 } 610 /* 611 * backup mbuf chain if we can need it later to retry. 612 * 613 * XXX maybe we can keep a direct reference to 614 * mrest without doing m_copym, but it's ...ugly. 615 */ 616 if (retry_cred) 617 mrest_backup = m_copym(mrest, 0, M_COPYALL, M_WAIT); 618 auth_type = RPCAUTH_UNIX; 619 /* XXX elad - ngroups */ 620 auth_len = (((kauth_cred_ngroups(cred) > nmp->nm_numgrps) ? 621 nmp->nm_numgrps : kauth_cred_ngroups(cred)) << 2) + 622 5 * NFSX_UNSIGNED; 623 } 624 m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len, 625 auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid); 626 if (auth_str) 627 free(auth_str, M_TEMP); 628 629 /* 630 * For stream protocols, insert a Sun RPC Record Mark. 631 */ 632 if (nmp->nm_sotype == SOCK_STREAM) { 633 M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); 634 *mtod(m, u_int32_t *) = htonl(0x80000000 | 635 (m->m_pkthdr.len - NFSX_UNSIGNED)); 636 } 637 rep->r_mreq = m; 638 rep->r_xid = xid; 639 tryagain: 640 if (nmp->nm_flag & NFSMNT_SOFT) 641 rep->r_retry = nmp->nm_retry; 642 else 643 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ 644 rep->r_rtt = rep->r_rexmit = 0; 645 if (nfs_proct[procnum] > 0) 646 rep->r_flags = R_TIMING; 647 else 648 rep->r_flags = 0; 649 rep->r_mrep = NULL; 650 651 /* 652 * Do the client side RPC. 653 */ 654 nfsstats.rpcrequests++; 655 /* 656 * Chain request into list of outstanding requests. Be sure 657 * to put it LAST so timer finds oldest requests first. 658 */ 659 s = splsoftnet(); 660 mutex_enter(&nfs_reqq_lock); 661 TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain); 662 mutex_exit(&nfs_reqq_lock); 663 nfs_timer_start(); 664 665 /* 666 * If backing off another request or avoiding congestion, don't 667 * send this one now but let timer do it. If not timing a request, 668 * do it now. 669 */ 670 if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM || 671 (nmp->nm_flag & NFSMNT_DUMBTIMR) || nmp->nm_sent < nmp->nm_cwnd)) { 672 splx(s); 673 if (nmp->nm_soflags & PR_CONNREQUIRED) 674 error = nfs_sndlock(nmp, rep); 675 if (!error) { 676 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 677 error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep, lwp); 678 if (nmp->nm_soflags & PR_CONNREQUIRED) 679 nfs_sndunlock(nmp); 680 } 681 s = splsoftnet(); 682 if (!error && (rep->r_flags & R_MUSTRESEND) == 0) { 683 if ((rep->r_flags & R_SENT) == 0) { 684 nmp->nm_sent += NFS_CWNDSCALE; 685 rep->r_flags |= R_SENT; 686 } 687 } 688 splx(s); 689 } else { 690 splx(s); 691 rep->r_rtt = -1; 692 } 693 694 /* 695 * Wait for the reply from our send or the timer's. 696 */ 697 if (!error || error == EPIPE || error == EWOULDBLOCK) 698 error = nfs_reply(rep, lwp); 699 700 /* 701 * RPC done, unlink the request. 702 */ 703 s = splsoftnet(); 704 mutex_enter(&nfs_reqq_lock); 705 TAILQ_REMOVE(&nfs_reqq, rep, r_chain); 706 mutex_exit(&nfs_reqq_lock); 707 708 /* 709 * Decrement the outstanding request count. 710 */ 711 if (rep->r_flags & R_SENT) { 712 rep->r_flags &= ~R_SENT; /* paranoia */ 713 nmp->nm_sent -= NFS_CWNDSCALE; 714 } 715 splx(s); 716 717 if (rexmitp != NULL) { 718 int rexmit; 719 720 if (nmp->nm_sotype != SOCK_DGRAM) 721 rexmit = (rep->r_flags & R_REXMITTED) != 0; 722 else 723 rexmit = rep->r_rexmit; 724 *rexmitp = rexmit; 725 } 726 727 /* 728 * If there was a successful reply and a tprintf msg. 729 * tprintf a response. 730 */ 731 if (!error && (rep->r_flags & R_TPRINTFMSG)) 732 nfs_msg(rep->r_lwp, nmp->nm_mountp->mnt_stat.f_mntfromname, 733 "is alive again"); 734 mrep = rep->r_mrep; 735 md = rep->r_md; 736 dpos = rep->r_dpos; 737 if (error) 738 goto nfsmout; 739 740 /* 741 * break down the rpc header and check if ok 742 */ 743 nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 744 if (*tl++ == rpc_msgdenied) { 745 if (*tl == rpc_mismatch) 746 error = EOPNOTSUPP; 747 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) { 748 if (!failed_auth) { 749 failed_auth++; 750 mheadend->m_next = NULL; 751 m_freem(mrep); 752 m_freem(rep->r_mreq); 753 goto kerbauth; 754 } else 755 error = EAUTH; 756 } else 757 error = EACCES; 758 m_freem(mrep); 759 goto nfsmout; 760 } 761 762 /* 763 * Grab any Kerberos verifier, otherwise just throw it away. 764 */ 765 verf_type = fxdr_unsigned(int, *tl++); 766 i = fxdr_unsigned(int32_t, *tl); 767 if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) { 768 error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep); 769 if (error) 770 goto nfsmout; 771 } else if (i > 0) 772 nfsm_adv(nfsm_rndup(i)); 773 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); 774 /* 0 == ok */ 775 if (*tl == 0) { 776 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); 777 if (*tl != 0) { 778 error = fxdr_unsigned(int, *tl); 779 switch (error) { 780 case NFSERR_PERM: 781 error = EPERM; 782 break; 783 784 case NFSERR_NOENT: 785 error = ENOENT; 786 break; 787 788 case NFSERR_IO: 789 error = EIO; 790 break; 791 792 case NFSERR_NXIO: 793 error = ENXIO; 794 break; 795 796 case NFSERR_ACCES: 797 error = EACCES; 798 if (!retry_cred) 799 break; 800 m_freem(mrep); 801 m_freem(rep->r_mreq); 802 kmem_free(rep, sizeof(*rep)); 803 use_opencred = !use_opencred; 804 if (mrest_backup == NULL) { 805 /* m_copym failure */ 806 KASSERT( 807 kauth_cred_getrefcnt(acred) == 1); 808 kauth_cred_free(acred); 809 return ENOMEM; 810 } 811 mrest = mrest_backup; 812 mrest_backup = NULL; 813 cred = origcred; 814 error = 0; 815 retry_cred = false; 816 goto tryagain_cred; 817 818 case NFSERR_EXIST: 819 error = EEXIST; 820 break; 821 822 case NFSERR_XDEV: 823 error = EXDEV; 824 break; 825 826 case NFSERR_NODEV: 827 error = ENODEV; 828 break; 829 830 case NFSERR_NOTDIR: 831 error = ENOTDIR; 832 break; 833 834 case NFSERR_ISDIR: 835 error = EISDIR; 836 break; 837 838 case NFSERR_INVAL: 839 error = EINVAL; 840 break; 841 842 case NFSERR_FBIG: 843 error = EFBIG; 844 break; 845 846 case NFSERR_NOSPC: 847 error = ENOSPC; 848 break; 849 850 case NFSERR_ROFS: 851 error = EROFS; 852 break; 853 854 case NFSERR_MLINK: 855 error = EMLINK; 856 break; 857 858 case NFSERR_TIMEDOUT: 859 error = ETIMEDOUT; 860 break; 861 862 case NFSERR_NAMETOL: 863 error = ENAMETOOLONG; 864 break; 865 866 case NFSERR_NOTEMPTY: 867 error = ENOTEMPTY; 868 break; 869 870 case NFSERR_DQUOT: 871 error = EDQUOT; 872 break; 873 874 case NFSERR_STALE: 875 /* 876 * If the File Handle was stale, invalidate the 877 * lookup cache, just in case. 878 */ 879 error = ESTALE; 880 cache_purge(NFSTOV(np)); 881 break; 882 883 case NFSERR_REMOTE: 884 error = EREMOTE; 885 break; 886 887 case NFSERR_WFLUSH: 888 case NFSERR_BADHANDLE: 889 case NFSERR_NOT_SYNC: 890 case NFSERR_BAD_COOKIE: 891 error = EINVAL; 892 break; 893 894 case NFSERR_NOTSUPP: 895 error = ENOTSUP; 896 break; 897 898 case NFSERR_TOOSMALL: 899 case NFSERR_SERVERFAULT: 900 case NFSERR_BADTYPE: 901 error = EINVAL; 902 break; 903 904 case NFSERR_TRYLATER: 905 if ((nmp->nm_flag & NFSMNT_NFSV3) == 0) 906 break; 907 m_freem(mrep); 908 error = 0; 909 waituntil = time_second + trylater_delay; 910 while (time_second < waituntil) { 911 kpause("nfstrylater", false, hz, NULL); 912 } 913 trylater_delay *= NFS_TRYLATERDELMUL; 914 if (trylater_delay > NFS_TRYLATERDELMAX) 915 trylater_delay = NFS_TRYLATERDELMAX; 916 /* 917 * RFC1813: 918 * The client should wait and then try 919 * the request with a new RPC transaction ID. 920 */ 921 nfs_renewxid(rep); 922 goto tryagain; 923 924 default: 925 #ifdef DIAGNOSTIC 926 printf("Invalid rpc error code %d\n", error); 927 #endif 928 error = EINVAL; 929 break; 930 } 931 932 if (nmp->nm_flag & NFSMNT_NFSV3) { 933 *mrp = mrep; 934 *mdp = md; 935 *dposp = dpos; 936 error |= NFSERR_RETERR; 937 } else 938 m_freem(mrep); 939 goto nfsmout; 940 } 941 942 /* 943 * note which credential worked to minimize number of retries. 944 */ 945 if (use_opencred) 946 np->n_flag |= NUSEOPENCRED; 947 else 948 np->n_flag &= ~NUSEOPENCRED; 949 950 *mrp = mrep; 951 *mdp = md; 952 *dposp = dpos; 953 954 KASSERT(error == 0); 955 goto nfsmout; 956 } 957 m_freem(mrep); 958 error = EPROTONOSUPPORT; 959 nfsmout: 960 KASSERT(kauth_cred_getrefcnt(acred) == 1); 961 kauth_cred_free(acred); 962 m_freem(rep->r_mreq); 963 kmem_free(rep, sizeof(*rep)); 964 m_freem(mrest_backup); 965 return (error); 966 } 967 968 /* 969 * Lock a socket against others. 970 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply 971 * and also to avoid race conditions between the processes with nfs requests 972 * in progress when a reconnect is necessary. 973 */ 974 static int 975 nfs_sndlock(struct nfsmount *nmp, struct nfsreq *rep) 976 { 977 struct lwp *l; 978 int timeo = 0; 979 bool catch_p = false; 980 int error = 0; 981 982 if (nmp->nm_flag & NFSMNT_SOFT) 983 timeo = nmp->nm_retry * nmp->nm_timeo; 984 985 if (nmp->nm_iflag & NFSMNT_DISMNTFORCE) 986 timeo = hz; 987 988 if (rep) { 989 l = rep->r_lwp; 990 if (rep->r_nmp->nm_flag & NFSMNT_INT) 991 catch_p = true; 992 } else 993 l = NULL; 994 mutex_enter(&nmp->nm_lock); 995 while ((nmp->nm_iflag & NFSMNT_SNDLOCK) != 0) { 996 if (rep && nfs_sigintr(rep->r_nmp, rep, l)) { 997 error = EINTR; 998 goto quit; 999 } 1000 if (catch_p) { 1001 error = cv_timedwait_sig(&nmp->nm_sndcv, 1002 &nmp->nm_lock, timeo); 1003 } else { 1004 error = cv_timedwait(&nmp->nm_sndcv, 1005 &nmp->nm_lock, timeo); 1006 } 1007 1008 if (error) { 1009 if ((error == EWOULDBLOCK) && 1010 (nmp->nm_flag & NFSMNT_SOFT)) { 1011 error = EIO; 1012 goto quit; 1013 } 1014 error = 0; 1015 } 1016 if (catch_p) { 1017 catch_p = false; 1018 timeo = 2 * hz; 1019 } 1020 } 1021 nmp->nm_iflag |= NFSMNT_SNDLOCK; 1022 quit: 1023 mutex_exit(&nmp->nm_lock); 1024 return error; 1025 } 1026 1027 /* 1028 * Unlock the stream socket for others. 1029 */ 1030 static void 1031 nfs_sndunlock(struct nfsmount *nmp) 1032 { 1033 1034 mutex_enter(&nmp->nm_lock); 1035 if ((nmp->nm_iflag & NFSMNT_SNDLOCK) == 0) 1036 panic("nfs sndunlock"); 1037 nmp->nm_iflag &= ~NFSMNT_SNDLOCK; 1038 cv_signal(&nmp->nm_sndcv); 1039 mutex_exit(&nmp->nm_lock); 1040 } 1041