1 /* $NetBSD: nfs_socket.c,v 1.164 2007/10/21 08:23:19 yamt Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1991, 1993, 1995 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95 35 */ 36 37 /* 38 * Socket operations for use by nfs 39 */ 40 41 #include <sys/cdefs.h> 42 __KERNEL_RCSID(0, "$NetBSD: nfs_socket.c,v 1.164 2007/10/21 08:23:19 yamt Exp $"); 43 44 #include "fs_nfs.h" 45 #include "opt_nfs.h" 46 #include "opt_nfsserver.h" 47 #include "opt_mbuftrace.h" 48 #include "opt_inet.h" 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/evcnt.h> 53 #include <sys/callout.h> 54 #include <sys/proc.h> 55 #include <sys/mount.h> 56 #include <sys/kernel.h> 57 #include <sys/mbuf.h> 58 #include <sys/vnode.h> 59 #include <sys/domain.h> 60 #include <sys/protosw.h> 61 #include <sys/socket.h> 62 #include <sys/socketvar.h> 63 #include <sys/syslog.h> 64 #include <sys/tprintf.h> 65 #include <sys/namei.h> 66 #include <sys/signal.h> 67 #include <sys/signalvar.h> 68 #include <sys/kauth.h> 69 70 #include <netinet/in.h> 71 #include <netinet/tcp.h> 72 73 #include <nfs/rpcv2.h> 74 #include <nfs/nfsproto.h> 75 #include <nfs/nfs.h> 76 #include <nfs/xdr_subs.h> 77 #include <nfs/nfsm_subs.h> 78 #include <nfs/nfsmount.h> 79 #include <nfs/nfsnode.h> 80 #include <nfs/nfsrtt.h> 81 #include <nfs/nfs_var.h> 82 83 MALLOC_DEFINE(M_NFSREQ, "NFS req", "NFS request header"); 84 #ifdef MBUFTRACE 85 struct mowner nfs_mowner = MOWNER_INIT("nfs",""); 86 #endif 87 88 /* 89 * Estimate rto for an nfs rpc sent via. an unreliable datagram. 90 * Use the mean and mean deviation of rtt for the appropriate type of rpc 91 * for the frequent rpcs and a default for the others. 92 * The justification for doing "other" this way is that these rpcs 93 * happen so infrequently that timer est. would probably be stale. 94 * Also, since many of these rpcs are 95 * non-idempotent, a conservative timeout is desired. 96 * getattr, lookup - A+2D 97 * read, write - A+4D 98 * other - nm_timeo 99 */ 100 #define NFS_RTO(n, t) \ 101 ((t) == 0 ? (n)->nm_timeo : \ 102 ((t) < 3 ? \ 103 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \ 104 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1))) 105 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1] 106 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1] 107 /* 108 * External data, mostly RPC constants in XDR form 109 */ 110 extern u_int32_t rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, 111 rpc_auth_unix, rpc_msgaccepted, rpc_call, rpc_autherr, 112 rpc_auth_kerb; 113 extern u_int32_t nfs_prog; 114 extern const int nfsv3_procid[NFS_NPROCS]; 115 extern int nfs_ticks; 116 117 /* 118 * Defines which timer to use for the procnum. 119 * 0 - default 120 * 1 - getattr 121 * 2 - lookup 122 * 3 - read 123 * 4 - write 124 */ 125 static const int proct[NFS_NPROCS] = { 126 [NFSPROC_NULL] = 0, 127 [NFSPROC_GETATTR] = 1, 128 [NFSPROC_SETATTR] = 0, 129 [NFSPROC_LOOKUP] = 2, 130 [NFSPROC_ACCESS] = 1, 131 [NFSPROC_READLINK] = 3, 132 [NFSPROC_READ] = 3, 133 [NFSPROC_WRITE] = 4, 134 [NFSPROC_CREATE] = 0, 135 [NFSPROC_MKDIR] = 0, 136 [NFSPROC_SYMLINK] = 0, 137 [NFSPROC_MKNOD] = 0, 138 [NFSPROC_REMOVE] = 0, 139 [NFSPROC_RMDIR] = 0, 140 [NFSPROC_RENAME] = 0, 141 [NFSPROC_LINK] = 0, 142 [NFSPROC_READDIR] = 3, 143 [NFSPROC_READDIRPLUS] = 3, 144 [NFSPROC_FSSTAT] = 0, 145 [NFSPROC_FSINFO] = 0, 146 [NFSPROC_PATHCONF] = 0, 147 [NFSPROC_COMMIT] = 0, 148 [NFSPROC_NOOP] = 0, 149 }; 150 151 /* 152 * There is a congestion window for outstanding rpcs maintained per mount 153 * point. The cwnd size is adjusted in roughly the way that: 154 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of 155 * SIGCOMM '88". ACM, August 1988. 156 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout 157 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd 158 * of rpcs is in progress. 159 * (The sent count and cwnd are scaled for integer arith.) 160 * Variants of "slow start" were tried and were found to be too much of a 161 * performance hit (ave. rtt 3 times larger), 162 * I suspect due to the large rtt that nfs rpcs have. 163 */ 164 #define NFS_CWNDSCALE 256 165 #define NFS_MAXCWND (NFS_CWNDSCALE * 32) 166 static const int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, }; 167 int nfsrtton = 0; 168 struct nfsrtt nfsrtt; 169 struct nfsreqhead nfs_reqq; 170 static callout_t nfs_timer_ch; 171 static struct evcnt nfs_timer_ev; 172 static struct evcnt nfs_timer_start_ev; 173 static struct evcnt nfs_timer_stop_ev; 174 175 static int nfs_sndlock(struct nfsmount *, struct nfsreq *); 176 static void nfs_sndunlock(struct nfsmount *); 177 static int nfs_rcvlock(struct nfsmount *, struct nfsreq *); 178 static void nfs_rcvunlock(struct nfsmount *); 179 180 /* 181 * Initialize sockets and congestion for a new NFS connection. 182 * We do not free the sockaddr if error. 183 */ 184 int 185 nfs_connect(nmp, rep, l) 186 struct nfsmount *nmp; 187 struct nfsreq *rep; 188 struct lwp *l; 189 { 190 struct socket *so; 191 int s, error, rcvreserve, sndreserve; 192 struct sockaddr *saddr; 193 struct sockaddr_in *sin; 194 #ifdef INET6 195 struct sockaddr_in6 *sin6; 196 #endif 197 struct mbuf *m; 198 199 nmp->nm_so = (struct socket *)0; 200 saddr = mtod(nmp->nm_nam, struct sockaddr *); 201 error = socreate(saddr->sa_family, &nmp->nm_so, 202 nmp->nm_sotype, nmp->nm_soproto, l); 203 if (error) 204 goto bad; 205 so = nmp->nm_so; 206 #ifdef MBUFTRACE 207 so->so_mowner = &nfs_mowner; 208 so->so_rcv.sb_mowner = &nfs_mowner; 209 so->so_snd.sb_mowner = &nfs_mowner; 210 #endif 211 nmp->nm_soflags = so->so_proto->pr_flags; 212 213 /* 214 * Some servers require that the client port be a reserved port number. 215 */ 216 if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) { 217 m = m_get(M_WAIT, MT_SOOPTS); 218 MCLAIM(m, so->so_mowner); 219 *mtod(m, int32_t *) = IP_PORTRANGE_LOW; 220 m->m_len = sizeof(int32_t); 221 if ((error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, m))) 222 goto bad; 223 m = m_get(M_WAIT, MT_SONAME); 224 MCLAIM(m, so->so_mowner); 225 sin = mtod(m, struct sockaddr_in *); 226 sin->sin_len = m->m_len = sizeof (struct sockaddr_in); 227 sin->sin_family = AF_INET; 228 sin->sin_addr.s_addr = INADDR_ANY; 229 sin->sin_port = 0; 230 error = sobind(so, m, &lwp0); 231 m_freem(m); 232 if (error) 233 goto bad; 234 } 235 #ifdef INET6 236 if (saddr->sa_family == AF_INET6 && (nmp->nm_flag & NFSMNT_RESVPORT)) { 237 m = m_get(M_WAIT, MT_SOOPTS); 238 MCLAIM(m, so->so_mowner); 239 *mtod(m, int32_t *) = IPV6_PORTRANGE_LOW; 240 m->m_len = sizeof(int32_t); 241 if ((error = sosetopt(so, IPPROTO_IPV6, IPV6_PORTRANGE, m))) 242 goto bad; 243 m = m_get(M_WAIT, MT_SONAME); 244 MCLAIM(m, so->so_mowner); 245 sin6 = mtod(m, struct sockaddr_in6 *); 246 sin6->sin6_len = m->m_len = sizeof (struct sockaddr_in6); 247 sin6->sin6_family = AF_INET6; 248 sin6->sin6_addr = in6addr_any; 249 sin6->sin6_port = 0; 250 error = sobind(so, m, &lwp0); 251 m_freem(m); 252 if (error) 253 goto bad; 254 } 255 #endif 256 257 /* 258 * Protocols that do not require connections may be optionally left 259 * unconnected for servers that reply from a port other than NFS_PORT. 260 */ 261 if (nmp->nm_flag & NFSMNT_NOCONN) { 262 if (nmp->nm_soflags & PR_CONNREQUIRED) { 263 error = ENOTCONN; 264 goto bad; 265 } 266 } else { 267 error = soconnect(so, nmp->nm_nam, l); 268 if (error) 269 goto bad; 270 271 /* 272 * Wait for the connection to complete. Cribbed from the 273 * connect system call but with the wait timing out so 274 * that interruptible mounts don't hang here for a long time. 275 */ 276 s = splsoftnet(); 277 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 278 (void) tsleep((void *)&so->so_timeo, PSOCK, 279 "nfscn1", 2 * hz); 280 if ((so->so_state & SS_ISCONNECTING) && 281 so->so_error == 0 && rep && 282 (error = nfs_sigintr(nmp, rep, rep->r_lwp)) != 0){ 283 so->so_state &= ~SS_ISCONNECTING; 284 splx(s); 285 goto bad; 286 } 287 } 288 if (so->so_error) { 289 error = so->so_error; 290 so->so_error = 0; 291 splx(s); 292 goto bad; 293 } 294 splx(s); 295 } 296 if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) { 297 so->so_rcv.sb_timeo = (5 * hz); 298 so->so_snd.sb_timeo = (5 * hz); 299 } else { 300 /* 301 * enable receive timeout to detect server crash and reconnect. 302 * otherwise, we can be stuck in soreceive forever. 303 */ 304 so->so_rcv.sb_timeo = (5 * hz); 305 so->so_snd.sb_timeo = 0; 306 } 307 if (nmp->nm_sotype == SOCK_DGRAM) { 308 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2; 309 rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + 310 NFS_MAXPKTHDR) * 2; 311 } else if (nmp->nm_sotype == SOCK_SEQPACKET) { 312 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2; 313 rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + 314 NFS_MAXPKTHDR) * 2; 315 } else { 316 if (nmp->nm_sotype != SOCK_STREAM) 317 panic("nfscon sotype"); 318 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 319 m = m_get(M_WAIT, MT_SOOPTS); 320 MCLAIM(m, so->so_mowner); 321 *mtod(m, int32_t *) = 1; 322 m->m_len = sizeof(int32_t); 323 sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m); 324 } 325 if (so->so_proto->pr_protocol == IPPROTO_TCP) { 326 m = m_get(M_WAIT, MT_SOOPTS); 327 MCLAIM(m, so->so_mowner); 328 *mtod(m, int32_t *) = 1; 329 m->m_len = sizeof(int32_t); 330 sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m); 331 } 332 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + 333 sizeof (u_int32_t)) * 2; 334 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + 335 sizeof (u_int32_t)) * 2; 336 } 337 error = soreserve(so, sndreserve, rcvreserve); 338 if (error) 339 goto bad; 340 so->so_rcv.sb_flags |= SB_NOINTR; 341 so->so_snd.sb_flags |= SB_NOINTR; 342 343 /* Initialize other non-zero congestion variables */ 344 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] = 345 NFS_TIMEO << 3; 346 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] = 347 nmp->nm_sdrtt[3] = 0; 348 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */ 349 nmp->nm_sent = 0; 350 nmp->nm_timeouts = 0; 351 return (0); 352 353 bad: 354 nfs_disconnect(nmp); 355 return (error); 356 } 357 358 /* 359 * Reconnect routine: 360 * Called when a connection is broken on a reliable protocol. 361 * - clean up the old socket 362 * - nfs_connect() again 363 * - set R_MUSTRESEND for all outstanding requests on mount point 364 * If this fails the mount point is DEAD! 365 * nb: Must be called with the nfs_sndlock() set on the mount point. 366 */ 367 int 368 nfs_reconnect(struct nfsreq *rep) 369 { 370 struct nfsreq *rp; 371 struct nfsmount *nmp = rep->r_nmp; 372 int error; 373 374 nfs_disconnect(nmp); 375 while ((error = nfs_connect(nmp, rep, &lwp0)) != 0) { 376 if (error == EINTR || error == ERESTART) 377 return (EINTR); 378 kpause("nfscn2", false, hz, NULL); 379 } 380 381 /* 382 * Loop through outstanding request list and fix up all requests 383 * on old socket. 384 */ 385 TAILQ_FOREACH(rp, &nfs_reqq, r_chain) { 386 if (rp->r_nmp == nmp) { 387 if ((rp->r_flags & R_MUSTRESEND) == 0) 388 rp->r_flags |= R_MUSTRESEND | R_REXMITTED; 389 rp->r_rexmit = 0; 390 } 391 } 392 return (0); 393 } 394 395 /* 396 * NFS disconnect. Clean up and unlink. 397 */ 398 void 399 nfs_disconnect(nmp) 400 struct nfsmount *nmp; 401 { 402 struct socket *so; 403 int drain = 0; 404 405 if (nmp->nm_so) { 406 so = nmp->nm_so; 407 nmp->nm_so = (struct socket *)0; 408 soshutdown(so, SHUT_RDWR); 409 drain = (nmp->nm_iflag & NFSMNT_DISMNT) != 0; 410 if (drain) { 411 /* 412 * soshutdown() above should wake up the current 413 * listener. 414 * Now wake up those waiting for the receive lock, and 415 * wait for them to go away unhappy, to prevent *nmp 416 * from evaporating while they're sleeping. 417 */ 418 mutex_enter(&nmp->nm_lock); 419 while (nmp->nm_waiters > 0) { 420 cv_broadcast(&nmp->nm_rcvcv); 421 cv_broadcast(&nmp->nm_sndcv); 422 cv_wait(&nmp->nm_disconcv, &nmp->nm_lock); 423 } 424 mutex_exit(&nmp->nm_lock); 425 } 426 soclose(so); 427 } 428 #ifdef DIAGNOSTIC 429 if (drain && (nmp->nm_waiters > 0)) 430 panic("nfs_disconnect: waiters left after drain?"); 431 #endif 432 } 433 434 void 435 nfs_safedisconnect(nmp) 436 struct nfsmount *nmp; 437 { 438 struct nfsreq dummyreq; 439 440 memset(&dummyreq, 0, sizeof(dummyreq)); 441 dummyreq.r_nmp = nmp; 442 nfs_rcvlock(nmp, &dummyreq); /* XXX ignored error return */ 443 nfs_disconnect(nmp); 444 nfs_rcvunlock(nmp); 445 } 446 447 /* 448 * This is the nfs send routine. For connection based socket types, it 449 * must be called with an nfs_sndlock() on the socket. 450 * "rep == NULL" indicates that it has been called from a server. 451 * For the client side: 452 * - return EINTR if the RPC is terminated, 0 otherwise 453 * - set R_MUSTRESEND if the send fails for any reason 454 * - do any cleanup required by recoverable socket errors (? ? ?) 455 * For the server side: 456 * - return EINTR or ERESTART if interrupted by a signal 457 * - return EPIPE if a connection is lost for connection based sockets (TCP...) 458 * - do any cleanup required by recoverable socket errors (? ? ?) 459 */ 460 int 461 nfs_send(so, nam, top, rep, l) 462 struct socket *so; 463 struct mbuf *nam; 464 struct mbuf *top; 465 struct nfsreq *rep; 466 struct lwp *l; 467 { 468 struct mbuf *sendnam; 469 int error, soflags, flags; 470 471 /* XXX nfs_doio()/nfs_request() calls with rep->r_lwp == NULL */ 472 if (l == NULL && rep->r_lwp == NULL) 473 l = curlwp; 474 475 if (rep) { 476 if (rep->r_flags & R_SOFTTERM) { 477 m_freem(top); 478 return (EINTR); 479 } 480 if ((so = rep->r_nmp->nm_so) == NULL) { 481 rep->r_flags |= R_MUSTRESEND; 482 m_freem(top); 483 return (0); 484 } 485 rep->r_flags &= ~R_MUSTRESEND; 486 soflags = rep->r_nmp->nm_soflags; 487 } else 488 soflags = so->so_proto->pr_flags; 489 if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) 490 sendnam = (struct mbuf *)0; 491 else 492 sendnam = nam; 493 if (so->so_type == SOCK_SEQPACKET) 494 flags = MSG_EOR; 495 else 496 flags = 0; 497 498 error = (*so->so_send)(so, sendnam, (struct uio *)0, top, 499 (struct mbuf *)0, flags, l); 500 if (error) { 501 if (rep) { 502 if (error == ENOBUFS && so->so_type == SOCK_DGRAM) { 503 /* 504 * We're too fast for the network/driver, 505 * and UDP isn't flowcontrolled. 506 * We need to resend. This is not fatal, 507 * just try again. 508 * 509 * Could be smarter here by doing some sort 510 * of a backoff, but this is rare. 511 */ 512 rep->r_flags |= R_MUSTRESEND; 513 } else { 514 if (error != EPIPE) 515 log(LOG_INFO, 516 "nfs send error %d for %s\n", 517 error, 518 rep->r_nmp->nm_mountp-> 519 mnt_stat.f_mntfromname); 520 /* 521 * Deal with errors for the client side. 522 */ 523 if (rep->r_flags & R_SOFTTERM) 524 error = EINTR; 525 else 526 rep->r_flags |= R_MUSTRESEND; 527 } 528 } else { 529 /* 530 * See above. This error can happen under normal 531 * circumstances and the log is too noisy. 532 * The error will still show up in nfsstat. 533 */ 534 if (error != ENOBUFS || so->so_type != SOCK_DGRAM) 535 log(LOG_INFO, "nfsd send error %d\n", error); 536 } 537 538 /* 539 * Handle any recoverable (soft) socket errors here. (? ? ?) 540 */ 541 if (error != EINTR && error != ERESTART && 542 error != EWOULDBLOCK && error != EPIPE) 543 error = 0; 544 } 545 return (error); 546 } 547 548 #ifdef NFS 549 /* 550 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all 551 * done by soreceive(), but for SOCK_STREAM we must deal with the Record 552 * Mark and consolidate the data into a new mbuf list. 553 * nb: Sometimes TCP passes the data up to soreceive() in long lists of 554 * small mbufs. 555 * For SOCK_STREAM we must be very careful to read an entire record once 556 * we have read any of it, even if the system call has been interrupted. 557 */ 558 int 559 nfs_receive(rep, aname, mp, l) 560 struct nfsreq *rep; 561 struct mbuf **aname; 562 struct mbuf **mp; 563 struct lwp *l; 564 { 565 struct socket *so; 566 struct uio auio; 567 struct iovec aio; 568 struct mbuf *m; 569 struct mbuf *control; 570 u_int32_t len; 571 struct mbuf **getnam; 572 int error, sotype, rcvflg; 573 574 /* 575 * Set up arguments for soreceive() 576 */ 577 *mp = (struct mbuf *)0; 578 *aname = (struct mbuf *)0; 579 sotype = rep->r_nmp->nm_sotype; 580 581 /* 582 * For reliable protocols, lock against other senders/receivers 583 * in case a reconnect is necessary. 584 * For SOCK_STREAM, first get the Record Mark to find out how much 585 * more there is to get. 586 * We must lock the socket against other receivers 587 * until we have an entire rpc request/reply. 588 */ 589 if (sotype != SOCK_DGRAM) { 590 error = nfs_sndlock(rep->r_nmp, rep); 591 if (error) 592 return (error); 593 tryagain: 594 /* 595 * Check for fatal errors and resending request. 596 */ 597 /* 598 * Ugh: If a reconnect attempt just happened, nm_so 599 * would have changed. NULL indicates a failed 600 * attempt that has essentially shut down this 601 * mount point. 602 */ 603 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) { 604 nfs_sndunlock(rep->r_nmp); 605 return (EINTR); 606 } 607 so = rep->r_nmp->nm_so; 608 if (!so) { 609 error = nfs_reconnect(rep); 610 if (error) { 611 nfs_sndunlock(rep->r_nmp); 612 return (error); 613 } 614 goto tryagain; 615 } 616 while (rep->r_flags & R_MUSTRESEND) { 617 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 618 nfsstats.rpcretries++; 619 rep->r_rtt = 0; 620 rep->r_flags &= ~R_TIMING; 621 error = nfs_send(so, rep->r_nmp->nm_nam, m, rep, l); 622 if (error) { 623 if (error == EINTR || error == ERESTART || 624 (error = nfs_reconnect(rep)) != 0) { 625 nfs_sndunlock(rep->r_nmp); 626 return (error); 627 } 628 goto tryagain; 629 } 630 } 631 nfs_sndunlock(rep->r_nmp); 632 if (sotype == SOCK_STREAM) { 633 aio.iov_base = (void *) &len; 634 aio.iov_len = sizeof(u_int32_t); 635 auio.uio_iov = &aio; 636 auio.uio_iovcnt = 1; 637 auio.uio_rw = UIO_READ; 638 auio.uio_offset = 0; 639 auio.uio_resid = sizeof(u_int32_t); 640 UIO_SETUP_SYSSPACE(&auio); 641 do { 642 rcvflg = MSG_WAITALL; 643 error = (*so->so_receive)(so, (struct mbuf **)0, &auio, 644 (struct mbuf **)0, (struct mbuf **)0, &rcvflg); 645 if (error == EWOULDBLOCK && rep) { 646 if (rep->r_flags & R_SOFTTERM) 647 return (EINTR); 648 /* 649 * if it seems that the server died after it 650 * received our request, set EPIPE so that 651 * we'll reconnect and retransmit requests. 652 */ 653 if (rep->r_rexmit >= rep->r_nmp->nm_retry) { 654 nfsstats.rpctimeouts++; 655 error = EPIPE; 656 } 657 } 658 } while (error == EWOULDBLOCK); 659 if (!error && auio.uio_resid > 0) { 660 /* 661 * Don't log a 0 byte receive; it means 662 * that the socket has been closed, and 663 * can happen during normal operation 664 * (forcible unmount or Solaris server). 665 */ 666 if (auio.uio_resid != sizeof (u_int32_t)) 667 log(LOG_INFO, 668 "short receive (%lu/%lu) from nfs server %s\n", 669 (u_long)sizeof(u_int32_t) - auio.uio_resid, 670 (u_long)sizeof(u_int32_t), 671 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 672 error = EPIPE; 673 } 674 if (error) 675 goto errout; 676 len = ntohl(len) & ~0x80000000; 677 /* 678 * This is SERIOUS! We are out of sync with the sender 679 * and forcing a disconnect/reconnect is all I can do. 680 */ 681 if (len > NFS_MAXPACKET) { 682 log(LOG_ERR, "%s (%d) from nfs server %s\n", 683 "impossible packet length", 684 len, 685 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 686 error = EFBIG; 687 goto errout; 688 } 689 auio.uio_resid = len; 690 do { 691 rcvflg = MSG_WAITALL; 692 error = (*so->so_receive)(so, (struct mbuf **)0, 693 &auio, mp, (struct mbuf **)0, &rcvflg); 694 } while (error == EWOULDBLOCK || error == EINTR || 695 error == ERESTART); 696 if (!error && auio.uio_resid > 0) { 697 if (len != auio.uio_resid) 698 log(LOG_INFO, 699 "short receive (%lu/%d) from nfs server %s\n", 700 (u_long)len - auio.uio_resid, len, 701 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 702 error = EPIPE; 703 } 704 } else { 705 /* 706 * NB: Since uio_resid is big, MSG_WAITALL is ignored 707 * and soreceive() will return when it has either a 708 * control msg or a data msg. 709 * We have no use for control msg., but must grab them 710 * and then throw them away so we know what is going 711 * on. 712 */ 713 auio.uio_resid = len = 100000000; /* Anything Big */ 714 /* not need to setup uio_vmspace */ 715 do { 716 rcvflg = 0; 717 error = (*so->so_receive)(so, (struct mbuf **)0, 718 &auio, mp, &control, &rcvflg); 719 if (control) 720 m_freem(control); 721 if (error == EWOULDBLOCK && rep) { 722 if (rep->r_flags & R_SOFTTERM) 723 return (EINTR); 724 } 725 } while (error == EWOULDBLOCK || 726 (!error && *mp == NULL && control)); 727 if ((rcvflg & MSG_EOR) == 0) 728 printf("Egad!!\n"); 729 if (!error && *mp == NULL) 730 error = EPIPE; 731 len -= auio.uio_resid; 732 } 733 errout: 734 if (error && error != EINTR && error != ERESTART) { 735 m_freem(*mp); 736 *mp = (struct mbuf *)0; 737 if (error != EPIPE) 738 log(LOG_INFO, 739 "receive error %d from nfs server %s\n", 740 error, 741 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 742 error = nfs_sndlock(rep->r_nmp, rep); 743 if (!error) 744 error = nfs_reconnect(rep); 745 if (!error) 746 goto tryagain; 747 else 748 nfs_sndunlock(rep->r_nmp); 749 } 750 } else { 751 if ((so = rep->r_nmp->nm_so) == NULL) 752 return (EACCES); 753 if (so->so_state & SS_ISCONNECTED) 754 getnam = (struct mbuf **)0; 755 else 756 getnam = aname; 757 auio.uio_resid = len = 1000000; 758 /* not need to setup uio_vmspace */ 759 do { 760 rcvflg = 0; 761 error = (*so->so_receive)(so, getnam, &auio, mp, 762 (struct mbuf **)0, &rcvflg); 763 if (error == EWOULDBLOCK && 764 (rep->r_flags & R_SOFTTERM)) 765 return (EINTR); 766 } while (error == EWOULDBLOCK); 767 len -= auio.uio_resid; 768 if (!error && *mp == NULL) 769 error = EPIPE; 770 } 771 if (error) { 772 m_freem(*mp); 773 *mp = (struct mbuf *)0; 774 } 775 return (error); 776 } 777 778 /* 779 * Implement receipt of reply on a socket. 780 * We must search through the list of received datagrams matching them 781 * with outstanding requests using the xid, until ours is found. 782 */ 783 /* ARGSUSED */ 784 int 785 nfs_reply(myrep, lwp) 786 struct nfsreq *myrep; 787 struct lwp *lwp; 788 { 789 struct nfsreq *rep; 790 struct nfsmount *nmp = myrep->r_nmp; 791 int32_t t1; 792 struct mbuf *mrep, *nam, *md; 793 u_int32_t rxid, *tl; 794 char *dpos, *cp2; 795 int error; 796 797 /* 798 * Loop around until we get our own reply 799 */ 800 for (;;) { 801 /* 802 * Lock against other receivers so that I don't get stuck in 803 * sbwait() after someone else has received my reply for me. 804 * Also necessary for connection based protocols to avoid 805 * race conditions during a reconnect. 806 */ 807 error = nfs_rcvlock(nmp, myrep); 808 if (error == EALREADY) 809 return (0); 810 if (error) 811 return (error); 812 /* 813 * Get the next Rpc reply off the socket 814 */ 815 816 mutex_enter(&nmp->nm_lock); 817 nmp->nm_waiters++; 818 mutex_exit(&nmp->nm_lock); 819 820 error = nfs_receive(myrep, &nam, &mrep, lwp); 821 822 mutex_enter(&nmp->nm_lock); 823 nmp->nm_waiters--; 824 cv_signal(&nmp->nm_disconcv); 825 mutex_exit(&nmp->nm_lock); 826 827 if (error) { 828 nfs_rcvunlock(nmp); 829 830 if (nmp->nm_iflag & NFSMNT_DISMNT) { 831 /* 832 * Oops, we're going away now.. 833 */ 834 return error; 835 } 836 /* 837 * Ignore routing errors on connectionless protocols? ? 838 */ 839 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { 840 nmp->nm_so->so_error = 0; 841 #ifdef DEBUG 842 printf("nfs_reply: ignoring error %d\n", error); 843 #endif 844 continue; 845 } 846 return (error); 847 } 848 if (nam) 849 m_freem(nam); 850 851 /* 852 * Get the xid and check that it is an rpc reply 853 */ 854 md = mrep; 855 dpos = mtod(md, void *); 856 nfsm_dissect(tl, u_int32_t *, 2*NFSX_UNSIGNED); 857 rxid = *tl++; 858 if (*tl != rpc_reply) { 859 nfsstats.rpcinvalid++; 860 m_freem(mrep); 861 nfsmout: 862 nfs_rcvunlock(nmp); 863 continue; 864 } 865 866 /* 867 * Loop through the request list to match up the reply 868 * Iff no match, just drop the datagram 869 */ 870 TAILQ_FOREACH(rep, &nfs_reqq, r_chain) { 871 if (rep->r_mrep == NULL && rxid == rep->r_xid) { 872 /* Found it.. */ 873 rep->r_mrep = mrep; 874 rep->r_md = md; 875 rep->r_dpos = dpos; 876 if (nfsrtton) { 877 struct rttl *rt; 878 879 rt = &nfsrtt.rttl[nfsrtt.pos]; 880 rt->proc = rep->r_procnum; 881 rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]); 882 rt->sent = nmp->nm_sent; 883 rt->cwnd = nmp->nm_cwnd; 884 rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1]; 885 rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1]; 886 rt->fsid = nmp->nm_mountp->mnt_stat.f_fsidx; 887 getmicrotime(&rt->tstamp); 888 if (rep->r_flags & R_TIMING) 889 rt->rtt = rep->r_rtt; 890 else 891 rt->rtt = 1000000; 892 nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ; 893 } 894 /* 895 * Update congestion window. 896 * Do the additive increase of 897 * one rpc/rtt. 898 */ 899 if (nmp->nm_cwnd <= nmp->nm_sent) { 900 nmp->nm_cwnd += 901 (NFS_CWNDSCALE * NFS_CWNDSCALE + 902 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; 903 if (nmp->nm_cwnd > NFS_MAXCWND) 904 nmp->nm_cwnd = NFS_MAXCWND; 905 } 906 rep->r_flags &= ~R_SENT; 907 nmp->nm_sent -= NFS_CWNDSCALE; 908 /* 909 * Update rtt using a gain of 0.125 on the mean 910 * and a gain of 0.25 on the deviation. 911 */ 912 if (rep->r_flags & R_TIMING) { 913 /* 914 * Since the timer resolution of 915 * NFS_HZ is so course, it can often 916 * result in r_rtt == 0. Since 917 * r_rtt == N means that the actual 918 * rtt is between N+dt and N+2-dt ticks, 919 * add 1. 920 */ 921 t1 = rep->r_rtt + 1; 922 t1 -= (NFS_SRTT(rep) >> 3); 923 NFS_SRTT(rep) += t1; 924 if (t1 < 0) 925 t1 = -t1; 926 t1 -= (NFS_SDRTT(rep) >> 2); 927 NFS_SDRTT(rep) += t1; 928 } 929 nmp->nm_timeouts = 0; 930 break; 931 } 932 } 933 nfs_rcvunlock(nmp); 934 /* 935 * If not matched to a request, drop it. 936 * If it's mine, get out. 937 */ 938 if (rep == 0) { 939 nfsstats.rpcunexpected++; 940 m_freem(mrep); 941 } else if (rep == myrep) { 942 if (rep->r_mrep == NULL) 943 panic("nfsreply nil"); 944 return (0); 945 } 946 } 947 } 948 949 /* 950 * nfs_request - goes something like this 951 * - fill in request struct 952 * - links it into list 953 * - calls nfs_send() for first transmit 954 * - calls nfs_receive() to get reply 955 * - break down rpc header and return with nfs reply pointed to 956 * by mrep or error 957 * nb: always frees up mreq mbuf list 958 */ 959 int 960 nfs_request(np, mrest, procnum, lwp, cred, mrp, mdp, dposp, rexmitp) 961 struct nfsnode *np; 962 struct mbuf *mrest; 963 int procnum; 964 struct lwp *lwp; 965 kauth_cred_t cred; 966 struct mbuf **mrp; 967 struct mbuf **mdp; 968 char **dposp; 969 int *rexmitp; 970 { 971 struct mbuf *m, *mrep; 972 struct nfsreq *rep; 973 u_int32_t *tl; 974 int i; 975 struct nfsmount *nmp = VFSTONFS(np->n_vnode->v_mount); 976 struct mbuf *md, *mheadend; 977 char nickv[RPCX_NICKVERF]; 978 time_t waituntil; 979 char *dpos, *cp2; 980 int t1, s, error = 0, mrest_len, auth_len, auth_type; 981 int trylater_delay = NFS_TRYLATERDEL, failed_auth = 0; 982 int verf_len, verf_type; 983 u_int32_t xid; 984 char *auth_str, *verf_str; 985 NFSKERBKEY_T key; /* save session key */ 986 kauth_cred_t acred; 987 struct mbuf *mrest_backup = NULL; 988 kauth_cred_t origcred = NULL; /* XXX: gcc */ 989 bool retry_cred = true; 990 bool use_opencred = (np->n_flag & NUSEOPENCRED) != 0; 991 992 if (rexmitp != NULL) 993 *rexmitp = 0; 994 995 acred = kauth_cred_alloc(); 996 997 tryagain_cred: 998 KASSERT(cred != NULL); 999 MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); 1000 rep->r_nmp = nmp; 1001 KASSERT(lwp == NULL || lwp == curlwp); 1002 rep->r_lwp = lwp; 1003 rep->r_procnum = procnum; 1004 i = 0; 1005 m = mrest; 1006 while (m) { 1007 i += m->m_len; 1008 m = m->m_next; 1009 } 1010 mrest_len = i; 1011 1012 /* 1013 * Get the RPC header with authorization. 1014 */ 1015 kerbauth: 1016 verf_str = auth_str = (char *)0; 1017 if (nmp->nm_flag & NFSMNT_KERB) { 1018 verf_str = nickv; 1019 verf_len = sizeof (nickv); 1020 auth_type = RPCAUTH_KERB4; 1021 memset((void *)key, 0, sizeof (key)); 1022 if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str, 1023 &auth_len, verf_str, verf_len)) { 1024 error = nfs_getauth(nmp, rep, cred, &auth_str, 1025 &auth_len, verf_str, &verf_len, key); 1026 if (error) { 1027 free((void *)rep, M_NFSREQ); 1028 m_freem(mrest); 1029 KASSERT(kauth_cred_getrefcnt(acred) == 1); 1030 kauth_cred_free(acred); 1031 return (error); 1032 } 1033 } 1034 retry_cred = false; 1035 } else { 1036 /* AUTH_UNIX */ 1037 uid_t uid; 1038 gid_t gid; 1039 1040 /* 1041 * on the most unix filesystems, permission checks are 1042 * done when the file is open(2)'ed. 1043 * ie. once a file is successfully open'ed, 1044 * following i/o operations never fail with EACCES. 1045 * we try to follow the semantics as far as possible. 1046 * 1047 * note that we expect that the nfs server always grant 1048 * accesses by the file's owner. 1049 */ 1050 origcred = cred; 1051 switch (procnum) { 1052 case NFSPROC_READ: 1053 case NFSPROC_WRITE: 1054 case NFSPROC_COMMIT: 1055 uid = np->n_vattr->va_uid; 1056 gid = np->n_vattr->va_gid; 1057 if (kauth_cred_geteuid(cred) == uid && 1058 kauth_cred_getegid(cred) == gid) { 1059 retry_cred = false; 1060 break; 1061 } 1062 if (use_opencred) 1063 break; 1064 kauth_cred_setuid(acred, uid); 1065 kauth_cred_seteuid(acred, uid); 1066 kauth_cred_setsvuid(acred, uid); 1067 kauth_cred_setgid(acred, gid); 1068 kauth_cred_setegid(acred, gid); 1069 kauth_cred_setsvgid(acred, gid); 1070 cred = acred; 1071 break; 1072 default: 1073 retry_cred = false; 1074 break; 1075 } 1076 /* 1077 * backup mbuf chain if we can need it later to retry. 1078 * 1079 * XXX maybe we can keep a direct reference to 1080 * mrest without doing m_copym, but it's ...ugly. 1081 */ 1082 if (retry_cred) 1083 mrest_backup = m_copym(mrest, 0, M_COPYALL, M_WAIT); 1084 auth_type = RPCAUTH_UNIX; 1085 /* XXX elad - ngroups */ 1086 auth_len = (((kauth_cred_ngroups(cred) > nmp->nm_numgrps) ? 1087 nmp->nm_numgrps : kauth_cred_ngroups(cred)) << 2) + 1088 5 * NFSX_UNSIGNED; 1089 } 1090 m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len, 1091 auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid); 1092 if (auth_str) 1093 free(auth_str, M_TEMP); 1094 1095 /* 1096 * For stream protocols, insert a Sun RPC Record Mark. 1097 */ 1098 if (nmp->nm_sotype == SOCK_STREAM) { 1099 M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); 1100 *mtod(m, u_int32_t *) = htonl(0x80000000 | 1101 (m->m_pkthdr.len - NFSX_UNSIGNED)); 1102 } 1103 rep->r_mreq = m; 1104 rep->r_xid = xid; 1105 tryagain: 1106 if (nmp->nm_flag & NFSMNT_SOFT) 1107 rep->r_retry = nmp->nm_retry; 1108 else 1109 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ 1110 rep->r_rtt = rep->r_rexmit = 0; 1111 if (proct[procnum] > 0) 1112 rep->r_flags = R_TIMING; 1113 else 1114 rep->r_flags = 0; 1115 rep->r_mrep = NULL; 1116 1117 /* 1118 * Do the client side RPC. 1119 */ 1120 nfsstats.rpcrequests++; 1121 /* 1122 * Chain request into list of outstanding requests. Be sure 1123 * to put it LAST so timer finds oldest requests first. 1124 */ 1125 s = splsoftnet(); 1126 TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain); 1127 nfs_timer_start(); 1128 1129 /* 1130 * If backing off another request or avoiding congestion, don't 1131 * send this one now but let timer do it. If not timing a request, 1132 * do it now. 1133 */ 1134 if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM || 1135 (nmp->nm_flag & NFSMNT_DUMBTIMR) || 1136 nmp->nm_sent < nmp->nm_cwnd)) { 1137 splx(s); 1138 if (nmp->nm_soflags & PR_CONNREQUIRED) 1139 error = nfs_sndlock(nmp, rep); 1140 if (!error) { 1141 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 1142 error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep, lwp); 1143 if (nmp->nm_soflags & PR_CONNREQUIRED) 1144 nfs_sndunlock(nmp); 1145 } 1146 if (!error && (rep->r_flags & R_MUSTRESEND) == 0) { 1147 nmp->nm_sent += NFS_CWNDSCALE; 1148 rep->r_flags |= R_SENT; 1149 } 1150 } else { 1151 splx(s); 1152 rep->r_rtt = -1; 1153 } 1154 1155 /* 1156 * Wait for the reply from our send or the timer's. 1157 */ 1158 if (!error || error == EPIPE) 1159 error = nfs_reply(rep, lwp); 1160 1161 /* 1162 * RPC done, unlink the request. 1163 */ 1164 s = splsoftnet(); 1165 TAILQ_REMOVE(&nfs_reqq, rep, r_chain); 1166 splx(s); 1167 1168 /* 1169 * Decrement the outstanding request count. 1170 */ 1171 if (rep->r_flags & R_SENT) { 1172 rep->r_flags &= ~R_SENT; /* paranoia */ 1173 nmp->nm_sent -= NFS_CWNDSCALE; 1174 } 1175 1176 if (rexmitp != NULL) { 1177 int rexmit; 1178 1179 if (nmp->nm_sotype != SOCK_DGRAM) 1180 rexmit = (rep->r_flags & R_REXMITTED) != 0; 1181 else 1182 rexmit = rep->r_rexmit; 1183 *rexmitp = rexmit; 1184 } 1185 1186 /* 1187 * If there was a successful reply and a tprintf msg. 1188 * tprintf a response. 1189 */ 1190 if (!error && (rep->r_flags & R_TPRINTFMSG)) 1191 nfs_msg(rep->r_lwp, nmp->nm_mountp->mnt_stat.f_mntfromname, 1192 "is alive again"); 1193 mrep = rep->r_mrep; 1194 md = rep->r_md; 1195 dpos = rep->r_dpos; 1196 if (error) 1197 goto nfsmout; 1198 1199 /* 1200 * break down the rpc header and check if ok 1201 */ 1202 nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 1203 if (*tl++ == rpc_msgdenied) { 1204 if (*tl == rpc_mismatch) 1205 error = EOPNOTSUPP; 1206 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) { 1207 if (!failed_auth) { 1208 failed_auth++; 1209 mheadend->m_next = (struct mbuf *)0; 1210 m_freem(mrep); 1211 m_freem(rep->r_mreq); 1212 goto kerbauth; 1213 } else 1214 error = EAUTH; 1215 } else 1216 error = EACCES; 1217 m_freem(mrep); 1218 goto nfsmout; 1219 } 1220 1221 /* 1222 * Grab any Kerberos verifier, otherwise just throw it away. 1223 */ 1224 verf_type = fxdr_unsigned(int, *tl++); 1225 i = fxdr_unsigned(int32_t, *tl); 1226 if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) { 1227 error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep); 1228 if (error) 1229 goto nfsmout; 1230 } else if (i > 0) 1231 nfsm_adv(nfsm_rndup(i)); 1232 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); 1233 /* 0 == ok */ 1234 if (*tl == 0) { 1235 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); 1236 if (*tl != 0) { 1237 error = fxdr_unsigned(int, *tl); 1238 switch (error) { 1239 case NFSERR_PERM: 1240 error = EPERM; 1241 break; 1242 1243 case NFSERR_NOENT: 1244 error = ENOENT; 1245 break; 1246 1247 case NFSERR_IO: 1248 error = EIO; 1249 break; 1250 1251 case NFSERR_NXIO: 1252 error = ENXIO; 1253 break; 1254 1255 case NFSERR_ACCES: 1256 error = EACCES; 1257 if (!retry_cred) 1258 break; 1259 m_freem(mrep); 1260 m_freem(rep->r_mreq); 1261 FREE(rep, M_NFSREQ); 1262 use_opencred = !use_opencred; 1263 if (mrest_backup == NULL) { 1264 /* m_copym failure */ 1265 KASSERT( 1266 kauth_cred_getrefcnt(acred) == 1); 1267 kauth_cred_free(acred); 1268 return ENOMEM; 1269 } 1270 mrest = mrest_backup; 1271 mrest_backup = NULL; 1272 cred = origcred; 1273 error = 0; 1274 retry_cred = false; 1275 goto tryagain_cred; 1276 1277 case NFSERR_EXIST: 1278 error = EEXIST; 1279 break; 1280 1281 case NFSERR_XDEV: 1282 error = EXDEV; 1283 break; 1284 1285 case NFSERR_NODEV: 1286 error = ENODEV; 1287 break; 1288 1289 case NFSERR_NOTDIR: 1290 error = ENOTDIR; 1291 break; 1292 1293 case NFSERR_ISDIR: 1294 error = EISDIR; 1295 break; 1296 1297 case NFSERR_INVAL: 1298 error = EINVAL; 1299 break; 1300 1301 case NFSERR_FBIG: 1302 error = EFBIG; 1303 break; 1304 1305 case NFSERR_NOSPC: 1306 error = ENOSPC; 1307 break; 1308 1309 case NFSERR_ROFS: 1310 error = EROFS; 1311 break; 1312 1313 case NFSERR_MLINK: 1314 error = EMLINK; 1315 break; 1316 1317 case NFSERR_TIMEDOUT: 1318 error = ETIMEDOUT; 1319 break; 1320 1321 case NFSERR_NAMETOL: 1322 error = ENAMETOOLONG; 1323 break; 1324 1325 case NFSERR_NOTEMPTY: 1326 error = ENOTEMPTY; 1327 break; 1328 1329 case NFSERR_DQUOT: 1330 error = EDQUOT; 1331 break; 1332 1333 case NFSERR_STALE: 1334 /* 1335 * If the File Handle was stale, invalidate the 1336 * lookup cache, just in case. 1337 */ 1338 error = ESTALE; 1339 cache_purge(NFSTOV(np)); 1340 break; 1341 1342 case NFSERR_REMOTE: 1343 error = EREMOTE; 1344 break; 1345 1346 case NFSERR_WFLUSH: 1347 case NFSERR_BADHANDLE: 1348 case NFSERR_NOT_SYNC: 1349 case NFSERR_BAD_COOKIE: 1350 error = EINVAL; 1351 break; 1352 1353 case NFSERR_NOTSUPP: 1354 error = ENOTSUP; 1355 break; 1356 1357 case NFSERR_TOOSMALL: 1358 case NFSERR_SERVERFAULT: 1359 case NFSERR_BADTYPE: 1360 error = EINVAL; 1361 break; 1362 1363 case NFSERR_TRYLATER: 1364 if ((nmp->nm_flag & NFSMNT_NFSV3) == 0) 1365 break; 1366 m_freem(mrep); 1367 error = 0; 1368 waituntil = time_second + trylater_delay; 1369 while (time_second < waituntil) { 1370 kpause("nfstrylater", false, hz, NULL); 1371 } 1372 trylater_delay *= NFS_TRYLATERDELMUL; 1373 if (trylater_delay > NFS_TRYLATERDELMAX) 1374 trylater_delay = NFS_TRYLATERDELMAX; 1375 /* 1376 * RFC1813: 1377 * The client should wait and then try 1378 * the request with a new RPC transaction ID. 1379 */ 1380 nfs_renewxid(rep); 1381 goto tryagain; 1382 1383 default: 1384 #ifdef DIAGNOSTIC 1385 printf("Invalid rpc error code %d\n", error); 1386 #endif 1387 error = EINVAL; 1388 break; 1389 } 1390 1391 if (nmp->nm_flag & NFSMNT_NFSV3) { 1392 *mrp = mrep; 1393 *mdp = md; 1394 *dposp = dpos; 1395 error |= NFSERR_RETERR; 1396 } else 1397 m_freem(mrep); 1398 goto nfsmout; 1399 } 1400 1401 /* 1402 * note which credential worked to minimize number of retries. 1403 */ 1404 if (use_opencred) 1405 np->n_flag |= NUSEOPENCRED; 1406 else 1407 np->n_flag &= ~NUSEOPENCRED; 1408 1409 *mrp = mrep; 1410 *mdp = md; 1411 *dposp = dpos; 1412 1413 KASSERT(error == 0); 1414 goto nfsmout; 1415 } 1416 m_freem(mrep); 1417 error = EPROTONOSUPPORT; 1418 nfsmout: 1419 KASSERT(kauth_cred_getrefcnt(acred) == 1); 1420 kauth_cred_free(acred); 1421 m_freem(rep->r_mreq); 1422 free((void *)rep, M_NFSREQ); 1423 m_freem(mrest_backup); 1424 return (error); 1425 } 1426 #endif /* NFS */ 1427 1428 /* 1429 * Generate the rpc reply header 1430 * siz arg. is used to decide if adding a cluster is worthwhile 1431 */ 1432 int 1433 nfs_rephead(siz, nd, slp, err, cache, frev, mrq, mbp, bposp) 1434 int siz; 1435 struct nfsrv_descript *nd; 1436 struct nfssvc_sock *slp; 1437 int err; 1438 int cache; 1439 u_quad_t *frev; 1440 struct mbuf **mrq; 1441 struct mbuf **mbp; 1442 char **bposp; 1443 { 1444 u_int32_t *tl; 1445 struct mbuf *mreq; 1446 char *bpos; 1447 struct mbuf *mb; 1448 1449 mreq = m_gethdr(M_WAIT, MT_DATA); 1450 MCLAIM(mreq, &nfs_mowner); 1451 mb = mreq; 1452 /* 1453 * If this is a big reply, use a cluster else 1454 * try and leave leading space for the lower level headers. 1455 */ 1456 siz += RPC_REPLYSIZ; 1457 if (siz >= max_datalen) { 1458 m_clget(mreq, M_WAIT); 1459 } else 1460 mreq->m_data += max_hdr; 1461 tl = mtod(mreq, u_int32_t *); 1462 mreq->m_len = 6 * NFSX_UNSIGNED; 1463 bpos = ((char *)tl) + mreq->m_len; 1464 *tl++ = txdr_unsigned(nd->nd_retxid); 1465 *tl++ = rpc_reply; 1466 if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) { 1467 *tl++ = rpc_msgdenied; 1468 if (err & NFSERR_AUTHERR) { 1469 *tl++ = rpc_autherr; 1470 *tl = txdr_unsigned(err & ~NFSERR_AUTHERR); 1471 mreq->m_len -= NFSX_UNSIGNED; 1472 bpos -= NFSX_UNSIGNED; 1473 } else { 1474 *tl++ = rpc_mismatch; 1475 *tl++ = txdr_unsigned(RPC_VER2); 1476 *tl = txdr_unsigned(RPC_VER2); 1477 } 1478 } else { 1479 *tl++ = rpc_msgaccepted; 1480 1481 /* 1482 * For Kerberos authentication, we must send the nickname 1483 * verifier back, otherwise just RPCAUTH_NULL. 1484 */ 1485 if (nd->nd_flag & ND_KERBFULL) { 1486 struct nfsuid *nuidp; 1487 struct timeval ktvin, ktvout; 1488 1489 memset(&ktvout, 0, sizeof ktvout); /* XXX gcc */ 1490 1491 LIST_FOREACH(nuidp, 1492 NUIDHASH(slp, kauth_cred_geteuid(nd->nd_cr)), 1493 nu_hash) { 1494 if (kauth_cred_geteuid(nuidp->nu_cr) == 1495 kauth_cred_geteuid(nd->nd_cr) && 1496 (!nd->nd_nam2 || netaddr_match( 1497 NU_NETFAM(nuidp), &nuidp->nu_haddr, 1498 nd->nd_nam2))) 1499 break; 1500 } 1501 if (nuidp) { 1502 ktvin.tv_sec = 1503 txdr_unsigned(nuidp->nu_timestamp.tv_sec 1504 - 1); 1505 ktvin.tv_usec = 1506 txdr_unsigned(nuidp->nu_timestamp.tv_usec); 1507 1508 /* 1509 * Encrypt the timestamp in ecb mode using the 1510 * session key. 1511 */ 1512 #ifdef NFSKERB 1513 XXX 1514 #endif 1515 1516 *tl++ = rpc_auth_kerb; 1517 *tl++ = txdr_unsigned(3 * NFSX_UNSIGNED); 1518 *tl = ktvout.tv_sec; 1519 nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 1520 *tl++ = ktvout.tv_usec; 1521 *tl++ = txdr_unsigned( 1522 kauth_cred_geteuid(nuidp->nu_cr)); 1523 } else { 1524 *tl++ = 0; 1525 *tl++ = 0; 1526 } 1527 } else { 1528 *tl++ = 0; 1529 *tl++ = 0; 1530 } 1531 switch (err) { 1532 case EPROGUNAVAIL: 1533 *tl = txdr_unsigned(RPC_PROGUNAVAIL); 1534 break; 1535 case EPROGMISMATCH: 1536 *tl = txdr_unsigned(RPC_PROGMISMATCH); 1537 nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 1538 *tl++ = txdr_unsigned(2); 1539 *tl = txdr_unsigned(3); 1540 break; 1541 case EPROCUNAVAIL: 1542 *tl = txdr_unsigned(RPC_PROCUNAVAIL); 1543 break; 1544 case EBADRPC: 1545 *tl = txdr_unsigned(RPC_GARBAGE); 1546 break; 1547 default: 1548 *tl = 0; 1549 if (err != NFSERR_RETVOID) { 1550 nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); 1551 if (err) 1552 *tl = txdr_unsigned(nfsrv_errmap(nd, err)); 1553 else 1554 *tl = 0; 1555 } 1556 break; 1557 }; 1558 } 1559 1560 if (mrq != NULL) 1561 *mrq = mreq; 1562 *mbp = mb; 1563 *bposp = bpos; 1564 if (err != 0 && err != NFSERR_RETVOID) 1565 nfsstats.srvrpc_errs++; 1566 return (0); 1567 } 1568 1569 static void 1570 nfs_timer_schedule(void) 1571 { 1572 1573 callout_schedule(&nfs_timer_ch, nfs_ticks); 1574 } 1575 1576 void 1577 nfs_timer_start(void) 1578 { 1579 1580 if (callout_pending(&nfs_timer_ch)) 1581 return; 1582 1583 nfs_timer_start_ev.ev_count++; 1584 nfs_timer_schedule(); 1585 } 1586 1587 void 1588 nfs_timer_init(void) 1589 { 1590 1591 callout_init(&nfs_timer_ch, 0); 1592 callout_setfunc(&nfs_timer_ch, nfs_timer, NULL); 1593 evcnt_attach_dynamic(&nfs_timer_ev, EVCNT_TYPE_MISC, NULL, 1594 "nfs", "timer"); 1595 evcnt_attach_dynamic(&nfs_timer_start_ev, EVCNT_TYPE_MISC, NULL, 1596 "nfs", "timer start"); 1597 evcnt_attach_dynamic(&nfs_timer_stop_ev, EVCNT_TYPE_MISC, NULL, 1598 "nfs", "timer stop"); 1599 } 1600 1601 /* 1602 * Nfs timer routine 1603 * Scan the nfsreq list and retranmit any requests that have timed out 1604 * To avoid retransmission attempts on STREAM sockets (in the future) make 1605 * sure to set the r_retry field to 0 (implies nm_retry == 0). 1606 * A non-NULL argument means 'initialize'. 1607 */ 1608 void 1609 nfs_timer(void *arg) 1610 { 1611 struct nfsreq *rep; 1612 struct mbuf *m; 1613 struct socket *so; 1614 struct nfsmount *nmp; 1615 int timeo; 1616 int s, error; 1617 bool more = false; 1618 #ifdef NFSSERVER 1619 struct timeval tv; 1620 struct nfssvc_sock *slp; 1621 u_quad_t cur_usec; 1622 #endif 1623 1624 nfs_timer_ev.ev_count++; 1625 1626 s = splsoftnet(); 1627 TAILQ_FOREACH(rep, &nfs_reqq, r_chain) { 1628 more = true; 1629 nmp = rep->r_nmp; 1630 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) 1631 continue; 1632 if (nfs_sigintr(nmp, rep, rep->r_lwp)) { 1633 rep->r_flags |= R_SOFTTERM; 1634 continue; 1635 } 1636 if (rep->r_rtt >= 0) { 1637 rep->r_rtt++; 1638 if (nmp->nm_flag & NFSMNT_DUMBTIMR) 1639 timeo = nmp->nm_timeo; 1640 else 1641 timeo = NFS_RTO(nmp, proct[rep->r_procnum]); 1642 if (nmp->nm_timeouts > 0) 1643 timeo *= nfs_backoff[nmp->nm_timeouts - 1]; 1644 if (rep->r_rtt <= timeo) 1645 continue; 1646 if (nmp->nm_timeouts < 1647 (sizeof(nfs_backoff) / sizeof(nfs_backoff[0]))) 1648 nmp->nm_timeouts++; 1649 } 1650 /* 1651 * Check for server not responding 1652 */ 1653 if ((rep->r_flags & R_TPRINTFMSG) == 0 && 1654 rep->r_rexmit > nmp->nm_deadthresh) { 1655 nfs_msg(rep->r_lwp, 1656 nmp->nm_mountp->mnt_stat.f_mntfromname, 1657 "not responding"); 1658 rep->r_flags |= R_TPRINTFMSG; 1659 } 1660 if (rep->r_rexmit >= rep->r_retry) { /* too many */ 1661 nfsstats.rpctimeouts++; 1662 rep->r_flags |= R_SOFTTERM; 1663 continue; 1664 } 1665 if (nmp->nm_sotype != SOCK_DGRAM) { 1666 if (++rep->r_rexmit > NFS_MAXREXMIT) 1667 rep->r_rexmit = NFS_MAXREXMIT; 1668 continue; 1669 } 1670 if ((so = nmp->nm_so) == NULL) 1671 continue; 1672 1673 /* 1674 * If there is enough space and the window allows.. 1675 * Resend it 1676 * Set r_rtt to -1 in case we fail to send it now. 1677 */ 1678 rep->r_rtt = -1; 1679 if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && 1680 ((nmp->nm_flag & NFSMNT_DUMBTIMR) || 1681 (rep->r_flags & R_SENT) || 1682 nmp->nm_sent < nmp->nm_cwnd) && 1683 (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){ 1684 if (so->so_state & SS_ISCONNECTED) 1685 error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 1686 (struct mbuf *)0, (struct mbuf *)0, (struct lwp *)0); 1687 else 1688 error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 1689 nmp->nm_nam, (struct mbuf *)0, (struct lwp *)0); 1690 if (error) { 1691 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { 1692 #ifdef DEBUG 1693 printf("nfs_timer: ignoring error %d\n", 1694 error); 1695 #endif 1696 so->so_error = 0; 1697 } 1698 } else { 1699 /* 1700 * Iff first send, start timing 1701 * else turn timing off, backoff timer 1702 * and divide congestion window by 2. 1703 */ 1704 if (rep->r_flags & R_SENT) { 1705 rep->r_flags &= ~R_TIMING; 1706 if (++rep->r_rexmit > NFS_MAXREXMIT) 1707 rep->r_rexmit = NFS_MAXREXMIT; 1708 nmp->nm_cwnd >>= 1; 1709 if (nmp->nm_cwnd < NFS_CWNDSCALE) 1710 nmp->nm_cwnd = NFS_CWNDSCALE; 1711 nfsstats.rpcretries++; 1712 } else { 1713 rep->r_flags |= R_SENT; 1714 nmp->nm_sent += NFS_CWNDSCALE; 1715 } 1716 rep->r_rtt = 0; 1717 } 1718 } 1719 } 1720 1721 #ifdef NFSSERVER 1722 /* 1723 * Scan the write gathering queues for writes that need to be 1724 * completed now. 1725 */ 1726 getmicrotime(&tv); 1727 cur_usec = (u_quad_t)tv.tv_sec * 1000000 + (u_quad_t)tv.tv_usec; 1728 TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) { 1729 if (LIST_FIRST(&slp->ns_tq)) { 1730 if (LIST_FIRST(&slp->ns_tq)->nd_time <= cur_usec) { 1731 nfsrv_wakenfsd(slp); 1732 } 1733 more = true; 1734 } 1735 } 1736 #endif /* NFSSERVER */ 1737 splx(s); 1738 if (more) { 1739 nfs_timer_schedule(); 1740 } else { 1741 nfs_timer_stop_ev.ev_count++; 1742 } 1743 } 1744 1745 /* 1746 * Test for a termination condition pending on the process. 1747 * This is used for NFSMNT_INT mounts. 1748 */ 1749 int 1750 nfs_sigintr(nmp, rep, l) 1751 struct nfsmount *nmp; 1752 struct nfsreq *rep; 1753 struct lwp *l; 1754 { 1755 sigset_t ss; 1756 1757 if (rep && (rep->r_flags & R_SOFTTERM)) 1758 return (EINTR); 1759 if (!(nmp->nm_flag & NFSMNT_INT)) 1760 return (0); 1761 if (l) { 1762 sigpending1(l, &ss); 1763 #if 0 1764 sigminusset(&l->l_proc->p_sigctx.ps_sigignore, &ss); 1765 #endif 1766 if (sigismember(&ss, SIGINT) || sigismember(&ss, SIGTERM) || 1767 sigismember(&ss, SIGKILL) || sigismember(&ss, SIGHUP) || 1768 sigismember(&ss, SIGQUIT)) 1769 return (EINTR); 1770 } 1771 return (0); 1772 } 1773 1774 /* 1775 * Lock a socket against others. 1776 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply 1777 * and also to avoid race conditions between the processes with nfs requests 1778 * in progress when a reconnect is necessary. 1779 */ 1780 static int 1781 nfs_sndlock(struct nfsmount *nmp, struct nfsreq *rep) 1782 { 1783 struct lwp *l; 1784 int timeo = 0; 1785 bool catch = false; 1786 int error = 0; 1787 1788 if (rep) { 1789 l = rep->r_lwp; 1790 if (rep->r_nmp->nm_flag & NFSMNT_INT) 1791 catch = true; 1792 } else 1793 l = NULL; 1794 mutex_enter(&nmp->nm_lock); 1795 while ((nmp->nm_iflag & NFSMNT_SNDLOCK) != 0) { 1796 if (rep && nfs_sigintr(rep->r_nmp, rep, l)) { 1797 error = EINTR; 1798 goto quit; 1799 } 1800 if (catch) { 1801 cv_timedwait_sig(&nmp->nm_sndcv, &nmp->nm_lock, timeo); 1802 } else { 1803 cv_timedwait(&nmp->nm_sndcv, &nmp->nm_lock, timeo); 1804 } 1805 if (catch) { 1806 catch = false; 1807 timeo = 2 * hz; 1808 } 1809 } 1810 nmp->nm_iflag |= NFSMNT_SNDLOCK; 1811 quit: 1812 mutex_exit(&nmp->nm_lock); 1813 return error; 1814 } 1815 1816 /* 1817 * Unlock the stream socket for others. 1818 */ 1819 static void 1820 nfs_sndunlock(struct nfsmount *nmp) 1821 { 1822 1823 mutex_enter(&nmp->nm_lock); 1824 if ((nmp->nm_iflag & NFSMNT_SNDLOCK) == 0) 1825 panic("nfs sndunlock"); 1826 nmp->nm_iflag &= ~NFSMNT_SNDLOCK; 1827 cv_signal(&nmp->nm_sndcv); 1828 mutex_exit(&nmp->nm_lock); 1829 } 1830 1831 static int 1832 nfs_rcvlock(struct nfsmount *nmp, struct nfsreq *rep) 1833 { 1834 int *flagp = &nmp->nm_iflag; 1835 int slptimeo = 0; 1836 bool catch; 1837 int error = 0; 1838 1839 KASSERT(nmp == rep->r_nmp); 1840 1841 catch = (nmp->nm_flag & NFSMNT_INT) != 0; 1842 mutex_enter(&nmp->nm_lock); 1843 while (/* CONSTCOND */ true) { 1844 if (*flagp & NFSMNT_DISMNT) { 1845 cv_signal(&nmp->nm_disconcv); 1846 error = EIO; 1847 break; 1848 } 1849 /* If our reply was received while we were sleeping, 1850 * then just return without taking the lock to avoid a 1851 * situation where a single iod could 'capture' the 1852 * receive lock. 1853 */ 1854 if (rep->r_mrep != NULL) { 1855 error = EALREADY; 1856 break; 1857 } 1858 if (nfs_sigintr(rep->r_nmp, rep, rep->r_lwp)) { 1859 error = EINTR; 1860 break; 1861 } 1862 if ((*flagp & NFSMNT_RCVLOCK) == 0) { 1863 *flagp |= NFSMNT_RCVLOCK; 1864 break; 1865 } 1866 if (catch) { 1867 cv_timedwait_sig(&nmp->nm_rcvcv, &nmp->nm_lock, 1868 slptimeo); 1869 } else { 1870 cv_timedwait(&nmp->nm_rcvcv, &nmp->nm_lock, 1871 slptimeo); 1872 } 1873 if (catch) { 1874 catch = false; 1875 slptimeo = 2 * hz; 1876 } 1877 } 1878 mutex_exit(&nmp->nm_lock); 1879 return error; 1880 } 1881 1882 /* 1883 * Unlock the stream socket for others. 1884 */ 1885 static void 1886 nfs_rcvunlock(struct nfsmount *nmp) 1887 { 1888 1889 mutex_enter(&nmp->nm_lock); 1890 if ((nmp->nm_iflag & NFSMNT_RCVLOCK) == 0) 1891 panic("nfs rcvunlock"); 1892 nmp->nm_iflag &= ~NFSMNT_RCVLOCK; 1893 cv_broadcast(&nmp->nm_rcvcv); 1894 mutex_exit(&nmp->nm_lock); 1895 } 1896 1897 /* 1898 * Parse an RPC request 1899 * - verify it 1900 * - allocate and fill in the cred. 1901 */ 1902 int 1903 nfs_getreq(nd, nfsd, has_header) 1904 struct nfsrv_descript *nd; 1905 struct nfsd *nfsd; 1906 int has_header; 1907 { 1908 int len, i; 1909 u_int32_t *tl; 1910 int32_t t1; 1911 struct uio uio; 1912 struct iovec iov; 1913 char *dpos, *cp2, *cp; 1914 u_int32_t nfsvers, auth_type; 1915 uid_t nickuid; 1916 int error = 0, ticklen; 1917 struct mbuf *mrep, *md; 1918 struct nfsuid *nuidp; 1919 struct timeval tvin, tvout; 1920 1921 memset(&tvout, 0, sizeof tvout); /* XXX gcc */ 1922 1923 KASSERT(nd->nd_cr == NULL); 1924 mrep = nd->nd_mrep; 1925 md = nd->nd_md; 1926 dpos = nd->nd_dpos; 1927 if (has_header) { 1928 nfsm_dissect(tl, u_int32_t *, 10 * NFSX_UNSIGNED); 1929 nd->nd_retxid = fxdr_unsigned(u_int32_t, *tl++); 1930 if (*tl++ != rpc_call) { 1931 m_freem(mrep); 1932 return (EBADRPC); 1933 } 1934 } else 1935 nfsm_dissect(tl, u_int32_t *, 8 * NFSX_UNSIGNED); 1936 nd->nd_repstat = 0; 1937 nd->nd_flag = 0; 1938 if (*tl++ != rpc_vers) { 1939 nd->nd_repstat = ERPCMISMATCH; 1940 nd->nd_procnum = NFSPROC_NOOP; 1941 return (0); 1942 } 1943 if (*tl != nfs_prog) { 1944 nd->nd_repstat = EPROGUNAVAIL; 1945 nd->nd_procnum = NFSPROC_NOOP; 1946 return (0); 1947 } 1948 tl++; 1949 nfsvers = fxdr_unsigned(u_int32_t, *tl++); 1950 if (nfsvers < NFS_VER2 || nfsvers > NFS_VER3) { 1951 nd->nd_repstat = EPROGMISMATCH; 1952 nd->nd_procnum = NFSPROC_NOOP; 1953 return (0); 1954 } 1955 if (nfsvers == NFS_VER3) 1956 nd->nd_flag = ND_NFSV3; 1957 nd->nd_procnum = fxdr_unsigned(u_int32_t, *tl++); 1958 if (nd->nd_procnum == NFSPROC_NULL) 1959 return (0); 1960 if (nd->nd_procnum > NFSPROC_COMMIT || 1961 (!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) { 1962 nd->nd_repstat = EPROCUNAVAIL; 1963 nd->nd_procnum = NFSPROC_NOOP; 1964 return (0); 1965 } 1966 if ((nd->nd_flag & ND_NFSV3) == 0) 1967 nd->nd_procnum = nfsv3_procid[nd->nd_procnum]; 1968 auth_type = *tl++; 1969 len = fxdr_unsigned(int, *tl++); 1970 if (len < 0 || len > RPCAUTH_MAXSIZ) { 1971 m_freem(mrep); 1972 return (EBADRPC); 1973 } 1974 1975 nd->nd_flag &= ~ND_KERBAUTH; 1976 /* 1977 * Handle auth_unix or auth_kerb. 1978 */ 1979 if (auth_type == rpc_auth_unix) { 1980 uid_t uid; 1981 gid_t gid, *grbuf; 1982 1983 nd->nd_cr = kauth_cred_alloc(); 1984 len = fxdr_unsigned(int, *++tl); 1985 if (len < 0 || len > NFS_MAXNAMLEN) { 1986 m_freem(mrep); 1987 error = EBADRPC; 1988 goto errout; 1989 } 1990 nfsm_adv(nfsm_rndup(len)); 1991 nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 1992 1993 uid = fxdr_unsigned(uid_t, *tl++); 1994 gid = fxdr_unsigned(gid_t, *tl++); 1995 kauth_cred_setuid(nd->nd_cr, uid); 1996 kauth_cred_seteuid(nd->nd_cr, uid); 1997 kauth_cred_setsvuid(nd->nd_cr, uid); 1998 kauth_cred_setgid(nd->nd_cr, gid); 1999 kauth_cred_setegid(nd->nd_cr, gid); 2000 kauth_cred_setsvgid(nd->nd_cr, gid); 2001 2002 len = fxdr_unsigned(int, *tl); 2003 if (len < 0 || len > RPCAUTH_UNIXGIDS) { 2004 m_freem(mrep); 2005 error = EBADRPC; 2006 goto errout; 2007 } 2008 nfsm_dissect(tl, u_int32_t *, (len + 2) * NFSX_UNSIGNED); 2009 2010 grbuf = malloc(len * sizeof(gid_t), M_TEMP, M_WAITOK); 2011 for (i = 0; i < len; i++) { 2012 if (i < NGROUPS) /* XXX elad */ 2013 grbuf[i] = fxdr_unsigned(gid_t, *tl++); 2014 else 2015 tl++; 2016 } 2017 kauth_cred_setgroups(nd->nd_cr, grbuf, min(len, NGROUPS), -1, 2018 UIO_SYSSPACE); 2019 free(grbuf, M_TEMP); 2020 2021 len = fxdr_unsigned(int, *++tl); 2022 if (len < 0 || len > RPCAUTH_MAXSIZ) { 2023 m_freem(mrep); 2024 error = EBADRPC; 2025 goto errout; 2026 } 2027 if (len > 0) 2028 nfsm_adv(nfsm_rndup(len)); 2029 } else if (auth_type == rpc_auth_kerb) { 2030 switch (fxdr_unsigned(int, *tl++)) { 2031 case RPCAKN_FULLNAME: 2032 ticklen = fxdr_unsigned(int, *tl); 2033 *((u_int32_t *)nfsd->nfsd_authstr) = *tl; 2034 uio.uio_resid = nfsm_rndup(ticklen) + NFSX_UNSIGNED; 2035 nfsd->nfsd_authlen = uio.uio_resid + NFSX_UNSIGNED; 2036 if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) { 2037 m_freem(mrep); 2038 error = EBADRPC; 2039 goto errout; 2040 } 2041 uio.uio_offset = 0; 2042 uio.uio_iov = &iov; 2043 uio.uio_iovcnt = 1; 2044 UIO_SETUP_SYSSPACE(&uio); 2045 iov.iov_base = (void *)&nfsd->nfsd_authstr[4]; 2046 iov.iov_len = RPCAUTH_MAXSIZ - 4; 2047 nfsm_mtouio(&uio, uio.uio_resid); 2048 nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2049 if (*tl++ != rpc_auth_kerb || 2050 fxdr_unsigned(int, *tl) != 4 * NFSX_UNSIGNED) { 2051 printf("Bad kerb verifier\n"); 2052 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF); 2053 nd->nd_procnum = NFSPROC_NOOP; 2054 return (0); 2055 } 2056 nfsm_dissect(cp, void *, 4 * NFSX_UNSIGNED); 2057 tl = (u_int32_t *)cp; 2058 if (fxdr_unsigned(int, *tl) != RPCAKN_FULLNAME) { 2059 printf("Not fullname kerb verifier\n"); 2060 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF); 2061 nd->nd_procnum = NFSPROC_NOOP; 2062 return (0); 2063 } 2064 cp += NFSX_UNSIGNED; 2065 memcpy(nfsd->nfsd_verfstr, cp, 3 * NFSX_UNSIGNED); 2066 nfsd->nfsd_verflen = 3 * NFSX_UNSIGNED; 2067 nd->nd_flag |= ND_KERBFULL; 2068 nfsd->nfsd_flag |= NFSD_NEEDAUTH; 2069 break; 2070 case RPCAKN_NICKNAME: 2071 if (len != 2 * NFSX_UNSIGNED) { 2072 printf("Kerb nickname short\n"); 2073 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADCRED); 2074 nd->nd_procnum = NFSPROC_NOOP; 2075 return (0); 2076 } 2077 nickuid = fxdr_unsigned(uid_t, *tl); 2078 nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 2079 if (*tl++ != rpc_auth_kerb || 2080 fxdr_unsigned(int, *tl) != 3 * NFSX_UNSIGNED) { 2081 printf("Kerb nick verifier bad\n"); 2082 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF); 2083 nd->nd_procnum = NFSPROC_NOOP; 2084 return (0); 2085 } 2086 nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 2087 tvin.tv_sec = *tl++; 2088 tvin.tv_usec = *tl; 2089 2090 LIST_FOREACH(nuidp, NUIDHASH(nfsd->nfsd_slp, nickuid), 2091 nu_hash) { 2092 if (kauth_cred_geteuid(nuidp->nu_cr) == nickuid && 2093 (!nd->nd_nam2 || 2094 netaddr_match(NU_NETFAM(nuidp), 2095 &nuidp->nu_haddr, nd->nd_nam2))) 2096 break; 2097 } 2098 if (!nuidp) { 2099 nd->nd_repstat = 2100 (NFSERR_AUTHERR|AUTH_REJECTCRED); 2101 nd->nd_procnum = NFSPROC_NOOP; 2102 return (0); 2103 } 2104 2105 /* 2106 * Now, decrypt the timestamp using the session key 2107 * and validate it. 2108 */ 2109 #ifdef NFSKERB 2110 XXX 2111 #endif 2112 2113 tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec); 2114 tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec); 2115 if (nuidp->nu_expire < time_second || 2116 nuidp->nu_timestamp.tv_sec > tvout.tv_sec || 2117 (nuidp->nu_timestamp.tv_sec == tvout.tv_sec && 2118 nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) { 2119 nuidp->nu_expire = 0; 2120 nd->nd_repstat = 2121 (NFSERR_AUTHERR|AUTH_REJECTVERF); 2122 nd->nd_procnum = NFSPROC_NOOP; 2123 return (0); 2124 } 2125 kauth_cred_hold(nuidp->nu_cr); 2126 nd->nd_cr = nuidp->nu_cr; 2127 nd->nd_flag |= ND_KERBNICK; 2128 } 2129 } else { 2130 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED); 2131 nd->nd_procnum = NFSPROC_NOOP; 2132 return (0); 2133 } 2134 2135 nd->nd_md = md; 2136 nd->nd_dpos = dpos; 2137 KASSERT((nd->nd_cr == NULL && (nfsd->nfsd_flag & NFSD_NEEDAUTH) != 0) 2138 || (nd->nd_cr != NULL && (nfsd->nfsd_flag & NFSD_NEEDAUTH) == 0)); 2139 return (0); 2140 nfsmout: 2141 errout: 2142 KASSERT(error != 0); 2143 if (nd->nd_cr != NULL) { 2144 kauth_cred_free(nd->nd_cr); 2145 nd->nd_cr = NULL; 2146 } 2147 return (error); 2148 } 2149 2150 int 2151 nfs_msg(l, server, msg) 2152 struct lwp *l; 2153 const char *server, *msg; 2154 { 2155 tpr_t tpr; 2156 2157 if (l) 2158 tpr = tprintf_open(l->l_proc); 2159 else 2160 tpr = NULL; 2161 tprintf(tpr, "nfs server %s: %s\n", server, msg); 2162 tprintf_close(tpr); 2163 return (0); 2164 } 2165 2166 #ifdef NFSSERVER 2167 int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *, 2168 struct nfssvc_sock *, struct lwp *, 2169 struct mbuf **)) = { 2170 nfsrv_null, 2171 nfsrv_getattr, 2172 nfsrv_setattr, 2173 nfsrv_lookup, 2174 nfsrv3_access, 2175 nfsrv_readlink, 2176 nfsrv_read, 2177 nfsrv_write, 2178 nfsrv_create, 2179 nfsrv_mkdir, 2180 nfsrv_symlink, 2181 nfsrv_mknod, 2182 nfsrv_remove, 2183 nfsrv_rmdir, 2184 nfsrv_rename, 2185 nfsrv_link, 2186 nfsrv_readdir, 2187 nfsrv_readdirplus, 2188 nfsrv_statfs, 2189 nfsrv_fsinfo, 2190 nfsrv_pathconf, 2191 nfsrv_commit, 2192 nfsrv_noop 2193 }; 2194 2195 /* 2196 * Socket upcall routine for the nfsd sockets. 2197 * The void *arg is a pointer to the "struct nfssvc_sock". 2198 * Essentially do as much as possible non-blocking, else punt and it will 2199 * be called with M_WAIT from an nfsd. 2200 */ 2201 void 2202 nfsrv_rcv(so, arg, waitflag) 2203 struct socket *so; 2204 void *arg; 2205 int waitflag; 2206 { 2207 struct nfssvc_sock *slp = (struct nfssvc_sock *)arg; 2208 struct mbuf *m; 2209 struct mbuf *mp, *nam; 2210 struct uio auio; 2211 int flags, error; 2212 int setflags = 0; 2213 2214 error = nfsdsock_lock(slp, (waitflag != M_DONTWAIT)); 2215 if (error) { 2216 setflags |= SLP_NEEDQ; 2217 goto dorecs_unlocked; 2218 } 2219 2220 KASSERT(so == slp->ns_so); 2221 #define NFS_TEST_HEAVY 2222 #ifdef NFS_TEST_HEAVY 2223 /* 2224 * Define this to test for nfsds handling this under heavy load. 2225 * 2226 * XXX it isn't safe to call so_receive from so_upcall context. 2227 */ 2228 if (waitflag == M_DONTWAIT) { 2229 setflags |= SLP_NEEDQ; 2230 goto dorecs; 2231 } 2232 #endif 2233 mutex_enter(&slp->ns_lock); 2234 slp->ns_flag &= ~SLP_NEEDQ; 2235 mutex_exit(&slp->ns_lock); 2236 if (so->so_type == SOCK_STREAM) { 2237 #ifndef NFS_TEST_HEAVY 2238 /* 2239 * If there are already records on the queue, defer soreceive() 2240 * to an nfsd so that there is feedback to the TCP layer that 2241 * the nfs servers are heavily loaded. 2242 */ 2243 if (slp->ns_rec && waitflag == M_DONTWAIT) { 2244 setflags |= SLP_NEEDQ; 2245 goto dorecs; 2246 } 2247 #endif 2248 2249 /* 2250 * Do soreceive(). 2251 */ 2252 auio.uio_resid = 1000000000; 2253 /* not need to setup uio_vmspace */ 2254 flags = MSG_DONTWAIT; 2255 error = (*so->so_receive)(so, &nam, &auio, &mp, NULL, &flags); 2256 if (error || mp == NULL) { 2257 if (error == EWOULDBLOCK) 2258 setflags |= SLP_NEEDQ; 2259 else 2260 setflags |= SLP_DISCONN; 2261 goto dorecs; 2262 } 2263 m = mp; 2264 m_claimm(m, &nfs_mowner); 2265 if (slp->ns_rawend) { 2266 slp->ns_rawend->m_next = m; 2267 slp->ns_cc += 1000000000 - auio.uio_resid; 2268 } else { 2269 slp->ns_raw = m; 2270 slp->ns_cc = 1000000000 - auio.uio_resid; 2271 } 2272 while (m->m_next) 2273 m = m->m_next; 2274 slp->ns_rawend = m; 2275 2276 /* 2277 * Now try and parse record(s) out of the raw stream data. 2278 */ 2279 error = nfsrv_getstream(slp, waitflag); 2280 if (error) { 2281 if (error == EPERM) 2282 setflags |= SLP_DISCONN; 2283 else 2284 setflags |= SLP_NEEDQ; 2285 } 2286 } else { 2287 do { 2288 auio.uio_resid = 1000000000; 2289 /* not need to setup uio_vmspace */ 2290 flags = MSG_DONTWAIT; 2291 error = (*so->so_receive)(so, &nam, &auio, &mp, NULL, 2292 &flags); 2293 if (mp) { 2294 if (nam) { 2295 m = nam; 2296 m->m_next = mp; 2297 } else 2298 m = mp; 2299 m_claimm(m, &nfs_mowner); 2300 if (slp->ns_recend) 2301 slp->ns_recend->m_nextpkt = m; 2302 else 2303 slp->ns_rec = m; 2304 slp->ns_recend = m; 2305 m->m_nextpkt = (struct mbuf *)0; 2306 } 2307 if (error) { 2308 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) 2309 && error != EWOULDBLOCK) { 2310 setflags |= SLP_DISCONN; 2311 goto dorecs; 2312 } 2313 } 2314 } while (mp); 2315 } 2316 dorecs: 2317 nfsdsock_unlock(slp); 2318 2319 dorecs_unlocked: 2320 /* 2321 * Now try and process the request records, non-blocking. 2322 */ 2323 if (setflags) { 2324 mutex_enter(&slp->ns_lock); 2325 slp->ns_flag |= setflags; 2326 mutex_exit(&slp->ns_lock); 2327 } 2328 if (waitflag == M_DONTWAIT && 2329 (slp->ns_rec || (slp->ns_flag & (SLP_DISCONN | SLP_NEEDQ)) != 0)) { 2330 nfsrv_wakenfsd(slp); 2331 } 2332 } 2333 2334 int 2335 nfsdsock_lock(struct nfssvc_sock *slp, bool waitok) 2336 { 2337 2338 mutex_enter(&slp->ns_lock); 2339 while ((~slp->ns_flag & (SLP_BUSY|SLP_VALID)) == 0) { 2340 if (!waitok) { 2341 mutex_exit(&slp->ns_lock); 2342 return EWOULDBLOCK; 2343 } 2344 cv_wait(&slp->ns_cv, &slp->ns_lock); 2345 } 2346 if ((slp->ns_flag & SLP_VALID) == 0) { 2347 mutex_exit(&slp->ns_lock); 2348 return EINVAL; 2349 } 2350 KASSERT((slp->ns_flag & SLP_BUSY) == 0); 2351 slp->ns_flag |= SLP_BUSY; 2352 mutex_exit(&slp->ns_lock); 2353 2354 return 0; 2355 } 2356 2357 void 2358 nfsdsock_unlock(struct nfssvc_sock *slp) 2359 { 2360 2361 mutex_enter(&slp->ns_lock); 2362 KASSERT((slp->ns_flag & SLP_BUSY) != 0); 2363 KASSERT((slp->ns_flag & SLP_VALID) != 0); 2364 cv_broadcast(&slp->ns_cv); 2365 slp->ns_flag &= ~SLP_BUSY; 2366 mutex_exit(&slp->ns_lock); 2367 } 2368 2369 int 2370 nfsdsock_drain(struct nfssvc_sock *slp) 2371 { 2372 int error = 0; 2373 2374 mutex_enter(&slp->ns_lock); 2375 if ((slp->ns_flag & SLP_VALID) == 0) { 2376 error = EINVAL; 2377 goto done; 2378 } 2379 slp->ns_flag &= ~SLP_VALID; 2380 while ((slp->ns_flag & SLP_BUSY) != 0) { 2381 cv_wait(&slp->ns_cv, &slp->ns_lock); 2382 } 2383 done: 2384 mutex_exit(&slp->ns_lock); 2385 2386 return error; 2387 } 2388 2389 /* 2390 * Try and extract an RPC request from the mbuf data list received on a 2391 * stream socket. The "waitflag" argument indicates whether or not it 2392 * can sleep. 2393 */ 2394 int 2395 nfsrv_getstream(slp, waitflag) 2396 struct nfssvc_sock *slp; 2397 int waitflag; 2398 { 2399 struct mbuf *m, **mpp; 2400 struct mbuf *recm; 2401 u_int32_t recmark; 2402 int error = 0; 2403 2404 KASSERT((slp->ns_flag & SLP_BUSY) != 0); 2405 for (;;) { 2406 if (slp->ns_reclen == 0) { 2407 if (slp->ns_cc < NFSX_UNSIGNED) { 2408 break; 2409 } 2410 m = slp->ns_raw; 2411 m_copydata(m, 0, NFSX_UNSIGNED, (void *)&recmark); 2412 m_adj(m, NFSX_UNSIGNED); 2413 slp->ns_cc -= NFSX_UNSIGNED; 2414 recmark = ntohl(recmark); 2415 slp->ns_reclen = recmark & ~0x80000000; 2416 if (recmark & 0x80000000) 2417 slp->ns_flag |= SLP_LASTFRAG; 2418 else 2419 slp->ns_flag &= ~SLP_LASTFRAG; 2420 if (slp->ns_reclen > NFS_MAXPACKET) { 2421 error = EPERM; 2422 break; 2423 } 2424 } 2425 2426 /* 2427 * Now get the record part. 2428 * 2429 * Note that slp->ns_reclen may be 0. Linux sometimes 2430 * generates 0-length records. 2431 */ 2432 if (slp->ns_cc == slp->ns_reclen) { 2433 recm = slp->ns_raw; 2434 slp->ns_raw = slp->ns_rawend = (struct mbuf *)0; 2435 slp->ns_cc = slp->ns_reclen = 0; 2436 } else if (slp->ns_cc > slp->ns_reclen) { 2437 recm = slp->ns_raw; 2438 m = m_split(recm, slp->ns_reclen, waitflag); 2439 if (m == NULL) { 2440 error = EWOULDBLOCK; 2441 break; 2442 } 2443 m_claimm(recm, &nfs_mowner); 2444 slp->ns_raw = m; 2445 if (m->m_next == NULL) 2446 slp->ns_rawend = m; 2447 slp->ns_cc -= slp->ns_reclen; 2448 slp->ns_reclen = 0; 2449 } else { 2450 break; 2451 } 2452 2453 /* 2454 * Accumulate the fragments into a record. 2455 */ 2456 mpp = &slp->ns_frag; 2457 while (*mpp) 2458 mpp = &((*mpp)->m_next); 2459 *mpp = recm; 2460 if (slp->ns_flag & SLP_LASTFRAG) { 2461 if (slp->ns_recend) 2462 slp->ns_recend->m_nextpkt = slp->ns_frag; 2463 else 2464 slp->ns_rec = slp->ns_frag; 2465 slp->ns_recend = slp->ns_frag; 2466 slp->ns_frag = (struct mbuf *)0; 2467 } 2468 } 2469 2470 return error; 2471 } 2472 2473 /* 2474 * Parse an RPC header. 2475 */ 2476 int 2477 nfsrv_dorec(slp, nfsd, ndp) 2478 struct nfssvc_sock *slp; 2479 struct nfsd *nfsd; 2480 struct nfsrv_descript **ndp; 2481 { 2482 struct mbuf *m, *nam; 2483 struct nfsrv_descript *nd; 2484 int error; 2485 2486 *ndp = NULL; 2487 2488 if (nfsdsock_lock(slp, true)) { 2489 return ENOBUFS; 2490 } 2491 m = slp->ns_rec; 2492 if (m == NULL) { 2493 nfsdsock_unlock(slp); 2494 return ENOBUFS; 2495 } 2496 slp->ns_rec = m->m_nextpkt; 2497 if (slp->ns_rec) 2498 m->m_nextpkt = NULL; 2499 else 2500 slp->ns_recend = NULL; 2501 nfsdsock_unlock(slp); 2502 2503 if (m->m_type == MT_SONAME) { 2504 nam = m; 2505 m = m->m_next; 2506 nam->m_next = NULL; 2507 } else 2508 nam = NULL; 2509 nd = nfsdreq_alloc(); 2510 nd->nd_md = nd->nd_mrep = m; 2511 nd->nd_nam2 = nam; 2512 nd->nd_dpos = mtod(m, void *); 2513 error = nfs_getreq(nd, nfsd, true); 2514 if (error) { 2515 m_freem(nam); 2516 nfsdreq_free(nd); 2517 return (error); 2518 } 2519 *ndp = nd; 2520 nfsd->nfsd_nd = nd; 2521 return (0); 2522 } 2523 2524 /* 2525 * Search for a sleeping nfsd and wake it up. 2526 * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the 2527 * running nfsds will go look for the work in the nfssvc_sock list. 2528 */ 2529 void 2530 nfsrv_wakenfsd(slp) 2531 struct nfssvc_sock *slp; 2532 { 2533 struct nfsd *nd; 2534 2535 if ((slp->ns_flag & SLP_VALID) == 0) 2536 return; 2537 mutex_enter(&nfsd_lock); 2538 if (slp->ns_flag & SLP_DOREC) { 2539 mutex_exit(&nfsd_lock); 2540 return; 2541 } 2542 nd = SLIST_FIRST(&nfsd_idle_head); 2543 if (nd) { 2544 SLIST_REMOVE_HEAD(&nfsd_idle_head, nfsd_idle); 2545 if (nd->nfsd_slp) 2546 panic("nfsd wakeup"); 2547 slp->ns_sref++; 2548 KASSERT(slp->ns_sref > 0); 2549 nd->nfsd_slp = slp; 2550 cv_signal(&nd->nfsd_cv); 2551 } else { 2552 slp->ns_flag |= SLP_DOREC; 2553 nfsd_head_flag |= NFSD_CHECKSLP; 2554 TAILQ_INSERT_TAIL(&nfssvc_sockpending, slp, ns_pending); 2555 } 2556 mutex_exit(&nfsd_lock); 2557 } 2558 2559 int 2560 nfsdsock_sendreply(struct nfssvc_sock *slp, struct nfsrv_descript *nd) 2561 { 2562 int error; 2563 2564 if (nd->nd_mrep != NULL) { 2565 m_freem(nd->nd_mrep); 2566 nd->nd_mrep = NULL; 2567 } 2568 2569 mutex_enter(&slp->ns_lock); 2570 if ((slp->ns_flag & SLP_SENDING) != 0) { 2571 SIMPLEQ_INSERT_TAIL(&slp->ns_sendq, nd, nd_sendq); 2572 mutex_exit(&slp->ns_lock); 2573 return 0; 2574 } 2575 KASSERT(SIMPLEQ_EMPTY(&slp->ns_sendq)); 2576 slp->ns_flag |= SLP_SENDING; 2577 mutex_exit(&slp->ns_lock); 2578 2579 again: 2580 error = nfs_send(slp->ns_so, nd->nd_nam2, nd->nd_mreq, NULL, curlwp); 2581 if (nd->nd_nam2) { 2582 m_free(nd->nd_nam2); 2583 } 2584 nfsdreq_free(nd); 2585 2586 mutex_enter(&slp->ns_lock); 2587 KASSERT((slp->ns_flag & SLP_SENDING) != 0); 2588 nd = SIMPLEQ_FIRST(&slp->ns_sendq); 2589 if (nd != NULL) { 2590 SIMPLEQ_REMOVE_HEAD(&slp->ns_sendq, nd_sendq); 2591 mutex_exit(&slp->ns_lock); 2592 goto again; 2593 } 2594 slp->ns_flag &= ~SLP_SENDING; 2595 mutex_exit(&slp->ns_lock); 2596 2597 return error; 2598 } 2599 #endif /* NFSSERVER */ 2600 2601 #if defined(NFSSERVER) || (defined(NFS) && !defined(NFS_V2_ONLY)) 2602 static struct pool nfs_srvdesc_pool; 2603 2604 void 2605 nfsdreq_init(void) 2606 { 2607 2608 pool_init(&nfs_srvdesc_pool, sizeof(struct nfsrv_descript), 2609 0, 0, 0, "nfsrvdescpl", &pool_allocator_nointr, IPL_NONE); 2610 } 2611 2612 struct nfsrv_descript * 2613 nfsdreq_alloc(void) 2614 { 2615 struct nfsrv_descript *nd; 2616 2617 nd = pool_get(&nfs_srvdesc_pool, PR_WAITOK); 2618 nd->nd_cr = NULL; 2619 return nd; 2620 } 2621 2622 void 2623 nfsdreq_free(struct nfsrv_descript *nd) 2624 { 2625 kauth_cred_t cr; 2626 2627 cr = nd->nd_cr; 2628 if (cr != NULL) { 2629 kauth_cred_free(cr); 2630 } 2631 pool_put(&nfs_srvdesc_pool, nd); 2632 } 2633 #endif /* defined(NFSSERVER) || (defined(NFS) && !defined(NFS_V2_ONLY)) */ 2634