1 /* $NetBSD: nfs_clntsocket.c,v 1.7 2024/07/05 04:31:54 rin Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1991, 1993, 1995 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95 35 */ 36 37 /* 38 * Socket operations for use by nfs 39 */ 40 41 #include <sys/cdefs.h> 42 __KERNEL_RCSID(0, "$NetBSD: nfs_clntsocket.c,v 1.7 2024/07/05 04:31:54 rin Exp $"); 43 44 #ifdef _KERNEL_OPT 45 #include "opt_nfs.h" 46 #include "opt_mbuftrace.h" 47 #endif 48 49 #include <sys/param.h> 50 #include <sys/systm.h> 51 #include <sys/evcnt.h> 52 #include <sys/callout.h> 53 #include <sys/proc.h> 54 #include <sys/mount.h> 55 #include <sys/kernel.h> 56 #include <sys/kmem.h> 57 #include <sys/mbuf.h> 58 #include <sys/vnode.h> 59 #include <sys/domain.h> 60 #include <sys/protosw.h> 61 #include <sys/socket.h> 62 #include <sys/socketvar.h> 63 #include <sys/syslog.h> 64 #include <sys/tprintf.h> 65 #include <sys/namei.h> 66 #include <sys/signal.h> 67 #include <sys/signalvar.h> 68 #include <sys/kauth.h> 69 70 #include <netinet/in.h> 71 #include <netinet/tcp.h> 72 73 #include <nfs/rpcv2.h> 74 #include <nfs/nfsproto.h> 75 #include <nfs/nfs.h> 76 #include <nfs/xdr_subs.h> 77 #include <nfs/nfsm_subs.h> 78 #include <nfs/nfsmount.h> 79 #include <nfs/nfsnode.h> 80 #include <nfs/nfsrtt.h> 81 #include <nfs/nfs_var.h> 82 83 static int nfs_sndlock(struct nfsmount *, struct nfsreq *); 84 static void nfs_sndunlock(struct nfsmount *); 85 86 /* 87 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all 88 * done by soreceive(), but for SOCK_STREAM we must deal with the Record 89 * Mark and consolidate the data into a new mbuf list. 90 * nb: Sometimes TCP passes the data up to soreceive() in long lists of 91 * small mbufs. 92 * For SOCK_STREAM we must be very careful to read an entire record once 93 * we have read any of it, even if the system call has been interrupted. 94 */ 95 static int 96 nfs_receive(struct nfsreq *rep, struct mbuf **aname, struct mbuf **mp, 97 struct lwp *l) 98 { 99 struct socket *so; 100 struct uio auio; 101 struct iovec aio; 102 struct mbuf *m; 103 struct mbuf *control; 104 u_int32_t len; 105 struct mbuf **getnam; 106 int error, sotype, rcvflg; 107 108 /* 109 * Set up arguments for soreceive() 110 */ 111 *mp = NULL; 112 *aname = NULL; 113 sotype = rep->r_nmp->nm_sotype; 114 115 /* 116 * For reliable protocols, lock against other senders/receivers 117 * in case a reconnect is necessary. 118 * For SOCK_STREAM, first get the Record Mark to find out how much 119 * more there is to get. 120 * We must lock the socket against other receivers 121 * until we have an entire rpc request/reply. 122 */ 123 if (sotype != SOCK_DGRAM) { 124 error = nfs_sndlock(rep->r_nmp, rep); 125 if (error) 126 return (error); 127 tryagain: 128 /* 129 * Check for fatal errors and resending request. 130 */ 131 /* 132 * Ugh: If a reconnect attempt just happened, nm_so 133 * would have changed. NULL indicates a failed 134 * attempt that has essentially shut down this 135 * mount point. 136 */ 137 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) { 138 nfs_sndunlock(rep->r_nmp); 139 return (EINTR); 140 } 141 so = rep->r_nmp->nm_so; 142 if (!so) { 143 error = nfs_reconnect(rep); 144 if (error) { 145 nfs_sndunlock(rep->r_nmp); 146 return (error); 147 } 148 goto tryagain; 149 } 150 while (rep->r_flags & R_MUSTRESEND) { 151 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 152 nfsstats.rpcretries++; 153 rep->r_rtt = 0; 154 rep->r_flags &= ~R_TIMING; 155 error = nfs_send(so, rep->r_nmp->nm_nam, m, rep, l); 156 if (error) { 157 if (error == EINTR || error == ERESTART || 158 (error = nfs_reconnect(rep)) != 0) { 159 nfs_sndunlock(rep->r_nmp); 160 return (error); 161 } 162 goto tryagain; 163 } 164 } 165 nfs_sndunlock(rep->r_nmp); 166 if (sotype == SOCK_STREAM) { 167 aio.iov_base = (void *) &len; 168 aio.iov_len = sizeof(u_int32_t); 169 auio.uio_iov = &aio; 170 auio.uio_iovcnt = 1; 171 auio.uio_rw = UIO_READ; 172 auio.uio_offset = 0; 173 auio.uio_resid = sizeof(u_int32_t); 174 UIO_SETUP_SYSSPACE(&auio); 175 do { 176 rcvflg = MSG_WAITALL; 177 error = (*so->so_receive)(so, NULL, &auio, 178 NULL, NULL, &rcvflg); 179 if (error == EWOULDBLOCK && rep) { 180 if (rep->r_flags & R_SOFTTERM) 181 return (EINTR); 182 /* 183 * if it seems that the server died after it 184 * received our request, set EPIPE so that 185 * we'll reconnect and retransmit requests. 186 */ 187 if (rep->r_rexmit >= rep->r_nmp->nm_retry) { 188 nfsstats.rpctimeouts++; 189 error = EPIPE; 190 } 191 } 192 } while (error == EWOULDBLOCK); 193 if (!error && auio.uio_resid > 0) { 194 /* 195 * Don't log a 0 byte receive; it means 196 * that the socket has been closed, and 197 * can happen during normal operation 198 * (forcible unmount or Solaris server). 199 */ 200 if (auio.uio_resid != sizeof (u_int32_t)) 201 log(LOG_INFO, 202 "short receive (%lu/%lu) from nfs server %s\n", 203 (u_long)sizeof(u_int32_t) - auio.uio_resid, 204 (u_long)sizeof(u_int32_t), 205 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 206 error = EPIPE; 207 } 208 if (error) 209 goto errout; 210 len = ntohl(len) & ~0x80000000; 211 /* 212 * This is SERIOUS! We are out of sync with the sender 213 * and forcing a disconnect/reconnect is all I can do. 214 */ 215 if (len > NFS_MAXPACKET) { 216 log(LOG_ERR, "%s (%d) from nfs server %s\n", 217 "impossible packet length", 218 len, 219 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 220 error = EFBIG; 221 goto errout; 222 } 223 auio.uio_resid = len; 224 do { 225 rcvflg = MSG_WAITALL; 226 error = (*so->so_receive)(so, NULL, 227 &auio, mp, NULL, &rcvflg); 228 } while (error == EWOULDBLOCK || error == EINTR || 229 error == ERESTART); 230 if (!error && auio.uio_resid > 0) { 231 if (len != auio.uio_resid) 232 log(LOG_INFO, 233 "short receive (%lu/%d) from nfs server %s\n", 234 (u_long)len - auio.uio_resid, len, 235 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 236 error = EPIPE; 237 } 238 } else { 239 /* 240 * NB: Since uio_resid is big, MSG_WAITALL is ignored 241 * and soreceive() will return when it has either a 242 * control msg or a data msg. 243 * We have no use for control msg., but must grab them 244 * and then throw them away so we know what is going 245 * on. 246 */ 247 auio.uio_resid = len = 100000000; /* Anything Big */ 248 /* not need to setup uio_vmspace */ 249 do { 250 rcvflg = 0; 251 error = (*so->so_receive)(so, NULL, 252 &auio, mp, &control, &rcvflg); 253 m_freem(control); 254 if (error == EWOULDBLOCK && rep) { 255 if (rep->r_flags & R_SOFTTERM) 256 return (EINTR); 257 } 258 } while (error == EWOULDBLOCK || 259 (!error && *mp == NULL && control)); 260 if ((rcvflg & MSG_EOR) == 0) 261 printf("Egad!!\n"); 262 if (!error && *mp == NULL) 263 error = EPIPE; 264 len -= auio.uio_resid; 265 } 266 errout: 267 if (error && error != EINTR && error != ERESTART) { 268 m_freem(*mp); 269 *mp = NULL; 270 if (error != EPIPE) 271 log(LOG_INFO, 272 "receive error %d from nfs server %s\n", 273 error, 274 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 275 error = nfs_sndlock(rep->r_nmp, rep); 276 if (!error) 277 error = nfs_reconnect(rep); 278 if (!error) 279 goto tryagain; 280 else 281 nfs_sndunlock(rep->r_nmp); 282 } 283 } else { 284 if ((so = rep->r_nmp->nm_so) == NULL) 285 return (EACCES); 286 if (so->so_state & SS_ISCONNECTED) 287 getnam = NULL; 288 else 289 getnam = aname; 290 auio.uio_resid = len = 1000000; 291 /* not need to setup uio_vmspace */ 292 do { 293 rcvflg = 0; 294 error = (*so->so_receive)(so, getnam, &auio, mp, 295 NULL, &rcvflg); 296 if (error == EWOULDBLOCK) { 297 int intr = nfs_sigintr(rep->r_nmp, rep, l); 298 if (intr) 299 error = intr; 300 } 301 } while (error == EWOULDBLOCK); 302 len -= auio.uio_resid; 303 if (!error && *mp == NULL) 304 error = EPIPE; 305 } 306 if (error) { 307 m_freem(*mp); 308 *mp = NULL; 309 } 310 return (error); 311 } 312 313 /* 314 * Implement receipt of reply on a socket. 315 * We must search through the list of received datagrams matching them 316 * with outstanding requests using the xid, until ours is found. 317 */ 318 /* ARGSUSED */ 319 static int 320 nfs_reply(struct nfsreq *myrep, struct lwp *lwp) 321 { 322 struct nfsreq *rep; 323 struct nfsmount *nmp = myrep->r_nmp; 324 int32_t t1; 325 struct mbuf *mrep, *nam, *md; 326 u_int32_t rxid, *tl; 327 char *dpos, *cp2; 328 int error, s; 329 330 /* 331 * Loop around until we get our own reply 332 */ 333 for (;;) { 334 /* 335 * Lock against other receivers so that I don't get stuck in 336 * sbwait() after someone else has received my reply for me. 337 * Also necessary for connection based protocols to avoid 338 * race conditions during a reconnect. 339 */ 340 error = nfs_rcvlock(nmp, myrep); 341 if (error == EALREADY) 342 return (0); 343 if (error) 344 return (error); 345 /* 346 * Get the next Rpc reply off the socket 347 */ 348 349 mutex_enter(&nmp->nm_lock); 350 nmp->nm_waiters++; 351 mutex_exit(&nmp->nm_lock); 352 353 error = nfs_receive(myrep, &nam, &mrep, lwp); 354 355 mutex_enter(&nmp->nm_lock); 356 nmp->nm_waiters--; 357 cv_signal(&nmp->nm_disconcv); 358 mutex_exit(&nmp->nm_lock); 359 360 if (error) { 361 nfs_rcvunlock(nmp); 362 363 if (nmp->nm_iflag & NFSMNT_DISMNT) { 364 /* 365 * Oops, we're going away now.. 366 */ 367 return error; 368 } 369 /* 370 * Ignore routing errors on connectionless protocols? ? 371 */ 372 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { 373 nmp->nm_so->so_error = 0; 374 #ifdef DEBUG 375 if (ratecheck(&nfs_reply_last_err_time, 376 &nfs_err_interval)) 377 printf("%s: ignoring error %d\n", 378 __func__, error); 379 #endif 380 continue; 381 } 382 return (error); 383 } 384 m_freem(nam); 385 386 /* 387 * Get the xid and check that it is an rpc reply 388 */ 389 md = mrep; 390 dpos = mtod(md, void *); 391 nfsm_dissect(tl, u_int32_t *, 2*NFSX_UNSIGNED); 392 rxid = *tl++; 393 if (*tl != rpc_reply) { 394 nfsstats.rpcinvalid++; 395 m_freem(mrep); 396 nfsmout: 397 nfs_rcvunlock(nmp); 398 continue; 399 } 400 401 /* 402 * Loop through the request list to match up the reply 403 * Iff no match, just drop the datagram 404 */ 405 s = splsoftnet(); 406 mutex_enter(&nfs_reqq_lock); 407 TAILQ_FOREACH(rep, &nfs_reqq, r_chain) { 408 if (rep->r_mrep != NULL || rxid != rep->r_xid) 409 continue; 410 411 /* Found it.. */ 412 rep->r_mrep = mrep; 413 rep->r_md = md; 414 rep->r_dpos = dpos; 415 if (nfsrtton) { 416 struct rttl *rt; 417 int proct = nfs_proct[rep->r_procnum]; 418 419 rt = &nfsrtt.rttl[nfsrtt.pos]; 420 rt->proc = rep->r_procnum; 421 rt->rto = NFS_RTO(nmp, proct); 422 rt->sent = nmp->nm_sent; 423 rt->cwnd = nmp->nm_cwnd; 424 rt->srtt = nmp->nm_srtt[proct - 1]; 425 rt->sdrtt = nmp->nm_sdrtt[proct - 1]; 426 rt->fsid = nmp->nm_mountp->mnt_stat.f_fsidx; 427 getmicrotime(&rt->tstamp); 428 if (rep->r_flags & R_TIMING) 429 rt->rtt = rep->r_rtt; 430 else 431 rt->rtt = 1000000; 432 nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ; 433 } 434 /* 435 * Update congestion window. 436 * Do the additive increase of 437 * one rpc/rtt. 438 */ 439 if (nmp->nm_cwnd <= nmp->nm_sent) { 440 nmp->nm_cwnd += 441 (NFS_CWNDSCALE * NFS_CWNDSCALE + 442 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; 443 if (nmp->nm_cwnd > NFS_MAXCWND) 444 nmp->nm_cwnd = NFS_MAXCWND; 445 } 446 rep->r_flags &= ~R_SENT; 447 nmp->nm_sent -= NFS_CWNDSCALE; 448 /* 449 * Update rtt using a gain of 0.125 on the mean 450 * and a gain of 0.25 on the deviation. 451 */ 452 if (rep->r_flags & R_TIMING) { 453 /* 454 * Since the timer resolution of 455 * NFS_HZ is so course, it can often 456 * result in r_rtt == 0. Since 457 * r_rtt == N means that the actual 458 * rtt is between N+dt and N+2-dt ticks, 459 * add 1. 460 */ 461 t1 = rep->r_rtt + 1; 462 t1 -= (NFS_SRTT(rep) >> 3); 463 NFS_SRTT(rep) += t1; 464 if (t1 < 0) 465 t1 = -t1; 466 t1 -= (NFS_SDRTT(rep) >> 2); 467 NFS_SDRTT(rep) += t1; 468 } 469 nmp->nm_timeouts = 0; 470 break; 471 } 472 mutex_exit(&nfs_reqq_lock); 473 splx(s); 474 nfs_rcvunlock(nmp); 475 /* 476 * If not matched to a request, drop it. 477 * If it's mine, get out. 478 */ 479 if (rep == 0) { 480 nfsstats.rpcunexpected++; 481 m_freem(mrep); 482 } else if (rep == myrep) { 483 if (rep->r_mrep == NULL) 484 panic("nfsreply nil"); 485 return (0); 486 } 487 } 488 } 489 490 /* 491 * nfs_request - goes something like this 492 * - fill in request struct 493 * - links it into list 494 * - calls nfs_send() for first transmit 495 * - calls nfs_receive() to get reply 496 * - break down rpc header and return with nfs reply pointed to 497 * by mrep or error 498 * nb: always frees up mreq mbuf list 499 */ 500 int 501 nfs_request(struct nfsnode *np, struct mbuf *mrest, int procnum, struct lwp *lwp, kauth_cred_t cred, struct mbuf **mrp, struct mbuf **mdp, char **dposp, int *rexmitp) 502 { 503 struct mbuf *m, *mrep; 504 struct nfsreq *rep; 505 u_int32_t *tl; 506 int i; 507 struct nfsmount *nmp = VFSTONFS(np->n_vnode->v_mount); 508 struct mbuf *md, *mheadend; 509 char nickv[RPCX_NICKVERF]; 510 time_t waituntil; 511 char *dpos, *cp2; 512 int t1, s, error = 0, mrest_len, auth_len, auth_type; 513 int trylater_delay = NFS_TRYLATERDEL, failed_auth = 0; 514 int verf_len, verf_type; 515 u_int32_t xid; 516 char *auth_str, *verf_str; 517 NFSKERBKEY_T key; /* save session key */ 518 kauth_cred_t acred; 519 struct mbuf *mrest_backup = NULL; 520 kauth_cred_t origcred = NULL; /* XXX: gcc */ 521 bool retry_cred = true; 522 bool use_opencred = (np->n_flag & NUSEOPENCRED) != 0; 523 524 if (rexmitp != NULL) 525 *rexmitp = 0; 526 527 acred = kauth_cred_alloc(); 528 529 tryagain_cred: 530 KASSERT(cred != NULL); 531 rep = kmem_alloc(sizeof(*rep), KM_SLEEP); 532 rep->r_nmp = nmp; 533 KASSERT(lwp == NULL || lwp == curlwp); 534 rep->r_lwp = lwp; 535 rep->r_procnum = procnum; 536 i = 0; 537 m = mrest; 538 while (m) { 539 i += m->m_len; 540 m = m->m_next; 541 } 542 mrest_len = i; 543 544 /* 545 * Get the RPC header with authorization. 546 */ 547 kerbauth: 548 verf_str = auth_str = NULL; 549 if (nmp->nm_flag & NFSMNT_KERB) { 550 verf_str = nickv; 551 verf_len = sizeof (nickv); 552 auth_type = RPCAUTH_KERB4; 553 memset((void *)key, 0, sizeof (key)); 554 if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str, 555 &auth_len, verf_str, verf_len)) { 556 error = nfs_getauth(nmp, rep, cred, &auth_str, 557 &auth_len, verf_str, &verf_len, key); 558 if (error) { 559 kmem_free(rep, sizeof(*rep)); 560 m_freem(mrest); 561 KASSERT(kauth_cred_getrefcnt(acred) == 1); 562 kauth_cred_free(acred); 563 return (error); 564 } 565 } 566 retry_cred = false; 567 } else { 568 /* AUTH_UNIX */ 569 uid_t uid; 570 gid_t gid; 571 572 /* 573 * on the most unix filesystems, permission checks are 574 * done when the file is open(2)'ed. 575 * ie. once a file is successfully open'ed, 576 * following i/o operations never fail with EACCES. 577 * we try to follow the semantics as far as possible. 578 * 579 * note that we expect that the nfs server always grant 580 * accesses by the file's owner. 581 */ 582 origcred = cred; 583 switch (procnum) { 584 case NFSPROC_READ: 585 case NFSPROC_WRITE: 586 case NFSPROC_COMMIT: 587 uid = np->n_vattr->va_uid; 588 gid = np->n_vattr->va_gid; 589 if (kauth_cred_geteuid(cred) == uid && 590 kauth_cred_getegid(cred) == gid) { 591 retry_cred = false; 592 break; 593 } 594 if (use_opencred) 595 break; 596 kauth_cred_setuid(acred, uid); 597 kauth_cred_seteuid(acred, uid); 598 kauth_cred_setsvuid(acred, uid); 599 kauth_cred_setgid(acred, gid); 600 kauth_cred_setegid(acred, gid); 601 kauth_cred_setsvgid(acred, gid); 602 cred = acred; 603 break; 604 default: 605 retry_cred = false; 606 break; 607 } 608 /* 609 * backup mbuf chain if we can need it later to retry. 610 * 611 * XXX maybe we can keep a direct reference to 612 * mrest without doing m_copym, but it's ...ugly. 613 */ 614 if (retry_cred) 615 mrest_backup = m_copym(mrest, 0, M_COPYALL, M_WAIT); 616 auth_type = RPCAUTH_UNIX; 617 /* XXX elad - ngroups */ 618 auth_len = (((kauth_cred_ngroups(cred) > nmp->nm_numgrps) ? 619 nmp->nm_numgrps : kauth_cred_ngroups(cred)) << 2) + 620 5 * NFSX_UNSIGNED; 621 } 622 m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len, 623 auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid); 624 if (auth_str) 625 free(auth_str, M_TEMP); 626 627 /* 628 * For stream protocols, insert a Sun RPC Record Mark. 629 */ 630 if (nmp->nm_sotype == SOCK_STREAM) { 631 M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); 632 *mtod(m, u_int32_t *) = htonl(0x80000000 | 633 (m->m_pkthdr.len - NFSX_UNSIGNED)); 634 } 635 rep->r_mreq = m; 636 rep->r_xid = xid; 637 tryagain: 638 if (nmp->nm_flag & NFSMNT_SOFT) 639 rep->r_retry = nmp->nm_retry; 640 else 641 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ 642 rep->r_rtt = rep->r_rexmit = 0; 643 if (nfs_proct[procnum] > 0) 644 rep->r_flags = R_TIMING; 645 else 646 rep->r_flags = 0; 647 rep->r_mrep = NULL; 648 649 /* 650 * Do the client side RPC. 651 */ 652 nfsstats.rpcrequests++; 653 /* 654 * Chain request into list of outstanding requests. Be sure 655 * to put it LAST so timer finds oldest requests first. 656 */ 657 s = splsoftnet(); 658 mutex_enter(&nfs_reqq_lock); 659 TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain); 660 mutex_exit(&nfs_reqq_lock); 661 nfs_timer_start(); 662 663 /* 664 * If backing off another request or avoiding congestion, don't 665 * send this one now but let timer do it. If not timing a request, 666 * do it now. 667 */ 668 if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM || 669 (nmp->nm_flag & NFSMNT_DUMBTIMR) || nmp->nm_sent < nmp->nm_cwnd)) { 670 splx(s); 671 if (nmp->nm_soflags & PR_CONNREQUIRED) 672 error = nfs_sndlock(nmp, rep); 673 if (!error) { 674 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 675 error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep, lwp); 676 if (nmp->nm_soflags & PR_CONNREQUIRED) 677 nfs_sndunlock(nmp); 678 } 679 s = splsoftnet(); 680 if (!error && (rep->r_flags & R_MUSTRESEND) == 0) { 681 if ((rep->r_flags & R_SENT) == 0) { 682 nmp->nm_sent += NFS_CWNDSCALE; 683 rep->r_flags |= R_SENT; 684 } 685 } 686 splx(s); 687 } else { 688 splx(s); 689 rep->r_rtt = -1; 690 } 691 692 /* 693 * Wait for the reply from our send or the timer's. 694 */ 695 if (!error || error == EPIPE || error == EWOULDBLOCK) 696 error = nfs_reply(rep, lwp); 697 698 /* 699 * RPC done, unlink the request. 700 */ 701 s = splsoftnet(); 702 mutex_enter(&nfs_reqq_lock); 703 TAILQ_REMOVE(&nfs_reqq, rep, r_chain); 704 mutex_exit(&nfs_reqq_lock); 705 706 /* 707 * Decrement the outstanding request count. 708 */ 709 if (rep->r_flags & R_SENT) { 710 rep->r_flags &= ~R_SENT; /* paranoia */ 711 nmp->nm_sent -= NFS_CWNDSCALE; 712 } 713 splx(s); 714 715 if (rexmitp != NULL) { 716 int rexmit; 717 718 if (nmp->nm_sotype != SOCK_DGRAM) 719 rexmit = (rep->r_flags & R_REXMITTED) != 0; 720 else 721 rexmit = rep->r_rexmit; 722 *rexmitp = rexmit; 723 } 724 725 /* 726 * If there was a successful reply and a tprintf msg. 727 * tprintf a response. 728 */ 729 if (!error && (rep->r_flags & R_TPRINTFMSG)) 730 nfs_msg(rep->r_lwp, nmp->nm_mountp->mnt_stat.f_mntfromname, 731 "is alive again"); 732 mrep = rep->r_mrep; 733 md = rep->r_md; 734 dpos = rep->r_dpos; 735 if (error) 736 goto nfsmout; 737 738 /* 739 * break down the rpc header and check if ok 740 */ 741 nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 742 if (*tl++ == rpc_msgdenied) { 743 if (*tl == rpc_mismatch) 744 error = EOPNOTSUPP; 745 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) { 746 if (!failed_auth) { 747 failed_auth++; 748 mheadend->m_next = NULL; 749 m_freem(mrep); 750 m_freem(rep->r_mreq); 751 goto kerbauth; 752 } else 753 error = EAUTH; 754 } else 755 error = EACCES; 756 m_freem(mrep); 757 goto nfsmout; 758 } 759 760 /* 761 * Grab any Kerberos verifier, otherwise just throw it away. 762 */ 763 verf_type = fxdr_unsigned(int, *tl++); 764 i = fxdr_unsigned(int32_t, *tl); 765 if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) { 766 error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep); 767 if (error) 768 goto nfsmout; 769 } else if (i > 0) 770 nfsm_adv(nfsm_rndup(i)); 771 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); 772 /* 0 == ok */ 773 if (*tl == 0) { 774 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); 775 if (*tl != 0) { 776 error = fxdr_unsigned(int, *tl); 777 switch (error) { 778 case NFSERR_PERM: 779 error = EPERM; 780 break; 781 782 case NFSERR_NOENT: 783 error = ENOENT; 784 break; 785 786 case NFSERR_IO: 787 error = EIO; 788 break; 789 790 case NFSERR_NXIO: 791 error = ENXIO; 792 break; 793 794 case NFSERR_ACCES: 795 error = EACCES; 796 if (!retry_cred) 797 break; 798 m_freem(mrep); 799 m_freem(rep->r_mreq); 800 kmem_free(rep, sizeof(*rep)); 801 use_opencred = !use_opencred; 802 if (mrest_backup == NULL) { 803 /* m_copym failure */ 804 KASSERT( 805 kauth_cred_getrefcnt(acred) == 1); 806 kauth_cred_free(acred); 807 return ENOMEM; 808 } 809 mrest = mrest_backup; 810 mrest_backup = NULL; 811 cred = origcred; 812 error = 0; 813 retry_cred = false; 814 goto tryagain_cred; 815 816 case NFSERR_EXIST: 817 error = EEXIST; 818 break; 819 820 case NFSERR_XDEV: 821 error = EXDEV; 822 break; 823 824 case NFSERR_NODEV: 825 error = ENODEV; 826 break; 827 828 case NFSERR_NOTDIR: 829 error = ENOTDIR; 830 break; 831 832 case NFSERR_ISDIR: 833 error = EISDIR; 834 break; 835 836 case NFSERR_INVAL: 837 error = EINVAL; 838 break; 839 840 case NFSERR_FBIG: 841 error = EFBIG; 842 break; 843 844 case NFSERR_NOSPC: 845 error = ENOSPC; 846 break; 847 848 case NFSERR_ROFS: 849 error = EROFS; 850 break; 851 852 case NFSERR_MLINK: 853 error = EMLINK; 854 break; 855 856 case NFSERR_TIMEDOUT: 857 error = ETIMEDOUT; 858 break; 859 860 case NFSERR_NAMETOL: 861 error = ENAMETOOLONG; 862 break; 863 864 case NFSERR_NOTEMPTY: 865 error = ENOTEMPTY; 866 break; 867 868 case NFSERR_DQUOT: 869 error = EDQUOT; 870 break; 871 872 case NFSERR_STALE: 873 /* 874 * If the File Handle was stale, invalidate the 875 * lookup cache, just in case. 876 */ 877 error = ESTALE; 878 cache_purge(NFSTOV(np)); 879 break; 880 881 case NFSERR_REMOTE: 882 error = EREMOTE; 883 break; 884 885 case NFSERR_WFLUSH: 886 case NFSERR_BADHANDLE: 887 case NFSERR_NOT_SYNC: 888 case NFSERR_BAD_COOKIE: 889 error = EINVAL; 890 break; 891 892 case NFSERR_NOTSUPP: 893 error = ENOTSUP; 894 break; 895 896 case NFSERR_TOOSMALL: 897 case NFSERR_SERVERFAULT: 898 case NFSERR_BADTYPE: 899 error = EINVAL; 900 break; 901 902 case NFSERR_TRYLATER: 903 if ((nmp->nm_flag & NFSMNT_NFSV3) == 0) 904 break; 905 m_freem(mrep); 906 error = 0; 907 waituntil = time_second + trylater_delay; 908 while (time_second < waituntil) { 909 kpause("nfstrylater", false, hz, NULL); 910 } 911 trylater_delay *= NFS_TRYLATERDELMUL; 912 if (trylater_delay > NFS_TRYLATERDELMAX) 913 trylater_delay = NFS_TRYLATERDELMAX; 914 /* 915 * RFC1813: 916 * The client should wait and then try 917 * the request with a new RPC transaction ID. 918 */ 919 nfs_renewxid(rep); 920 goto tryagain; 921 922 default: 923 #ifdef DIAGNOSTIC 924 printf("Invalid rpc error code %d\n", error); 925 #endif 926 error = EINVAL; 927 break; 928 } 929 930 if (nmp->nm_flag & NFSMNT_NFSV3) { 931 *mrp = mrep; 932 *mdp = md; 933 *dposp = dpos; 934 error |= NFSERR_RETERR; 935 } else 936 m_freem(mrep); 937 goto nfsmout; 938 } 939 940 /* 941 * note which credential worked to minimize number of retries. 942 */ 943 if (use_opencred) 944 np->n_flag |= NUSEOPENCRED; 945 else 946 np->n_flag &= ~NUSEOPENCRED; 947 948 *mrp = mrep; 949 *mdp = md; 950 *dposp = dpos; 951 952 KASSERT(error == 0); 953 goto nfsmout; 954 } 955 m_freem(mrep); 956 error = EPROTONOSUPPORT; 957 nfsmout: 958 KASSERT(kauth_cred_getrefcnt(acred) == 1); 959 kauth_cred_free(acred); 960 m_freem(rep->r_mreq); 961 kmem_free(rep, sizeof(*rep)); 962 m_freem(mrest_backup); 963 return (error); 964 } 965 966 /* 967 * Lock a socket against others. 968 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply 969 * and also to avoid race conditions between the processes with nfs requests 970 * in progress when a reconnect is necessary. 971 */ 972 static int 973 nfs_sndlock(struct nfsmount *nmp, struct nfsreq *rep) 974 { 975 struct lwp *l; 976 int timeo = 0; 977 bool catch_p = false; 978 int error = 0; 979 980 if (nmp->nm_flag & NFSMNT_SOFT) 981 timeo = nmp->nm_retry * nmp->nm_timeo; 982 983 if (nmp->nm_iflag & NFSMNT_DISMNTFORCE) 984 timeo = hz; 985 986 if (rep) { 987 l = rep->r_lwp; 988 if (rep->r_nmp->nm_flag & NFSMNT_INT) 989 catch_p = true; 990 } else 991 l = NULL; 992 mutex_enter(&nmp->nm_lock); 993 while ((nmp->nm_iflag & NFSMNT_SNDLOCK) != 0) { 994 if (rep && nfs_sigintr(rep->r_nmp, rep, l)) { 995 error = EINTR; 996 goto quit; 997 } 998 if (catch_p) { 999 error = cv_timedwait_sig(&nmp->nm_sndcv, 1000 &nmp->nm_lock, timeo); 1001 } else { 1002 error = cv_timedwait(&nmp->nm_sndcv, 1003 &nmp->nm_lock, timeo); 1004 } 1005 1006 if (error) { 1007 if ((error == EWOULDBLOCK) && 1008 (nmp->nm_flag & NFSMNT_SOFT)) { 1009 error = EIO; 1010 goto quit; 1011 } 1012 error = 0; 1013 } 1014 if (catch_p) { 1015 catch_p = false; 1016 timeo = 2 * hz; 1017 } 1018 } 1019 nmp->nm_iflag |= NFSMNT_SNDLOCK; 1020 quit: 1021 mutex_exit(&nmp->nm_lock); 1022 return error; 1023 } 1024 1025 /* 1026 * Unlock the stream socket for others. 1027 */ 1028 static void 1029 nfs_sndunlock(struct nfsmount *nmp) 1030 { 1031 1032 mutex_enter(&nmp->nm_lock); 1033 if ((nmp->nm_iflag & NFSMNT_SNDLOCK) == 0) 1034 panic("nfs sndunlock"); 1035 nmp->nm_iflag &= ~NFSMNT_SNDLOCK; 1036 cv_signal(&nmp->nm_sndcv); 1037 mutex_exit(&nmp->nm_lock); 1038 } 1039