1 /* $NetBSD: nfs_socket.c,v 1.108 2004/06/24 04:15:51 jonathan Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1991, 1993, 1995 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95 35 */ 36 37 /* 38 * Socket operations for use by nfs 39 */ 40 41 #include <sys/cdefs.h> 42 __KERNEL_RCSID(0, "$NetBSD: nfs_socket.c,v 1.108 2004/06/24 04:15:51 jonathan Exp $"); 43 44 #include "fs_nfs.h" 45 #include "opt_nfs.h" 46 #include "opt_nfsserver.h" 47 #include "opt_mbuftrace.h" 48 #include "opt_inet.h" 49 50 #include <sys/param.h> 51 #include <sys/systm.h> 52 #include <sys/callout.h> 53 #include <sys/proc.h> 54 #include <sys/mount.h> 55 #include <sys/kernel.h> 56 #include <sys/mbuf.h> 57 #include <sys/vnode.h> 58 #include <sys/domain.h> 59 #include <sys/protosw.h> 60 #include <sys/socket.h> 61 #include <sys/socketvar.h> 62 #include <sys/syslog.h> 63 #include <sys/tprintf.h> 64 #include <sys/namei.h> 65 #include <sys/signal.h> 66 #include <sys/signalvar.h> 67 68 #include <netinet/in.h> 69 #include <netinet/tcp.h> 70 71 #include <nfs/rpcv2.h> 72 #include <nfs/nfsproto.h> 73 #include <nfs/nfs.h> 74 #include <nfs/xdr_subs.h> 75 #include <nfs/nfsm_subs.h> 76 #include <nfs/nfsmount.h> 77 #include <nfs/nfsnode.h> 78 #include <nfs/nfsrtt.h> 79 #include <nfs/nqnfs.h> 80 #include <nfs/nfs_var.h> 81 82 MALLOC_DEFINE(M_NFSREQ, "NFS req", "NFS request header"); 83 #ifdef MBUFTRACE 84 struct mowner nfs_mowner = { "nfs" }; 85 #endif 86 87 /* 88 * Estimate rto for an nfs rpc sent via. an unreliable datagram. 89 * Use the mean and mean deviation of rtt for the appropriate type of rpc 90 * for the frequent rpcs and a default for the others. 91 * The justification for doing "other" this way is that these rpcs 92 * happen so infrequently that timer est. would probably be stale. 93 * Also, since many of these rpcs are 94 * non-idempotent, a conservative timeout is desired. 95 * getattr, lookup - A+2D 96 * read, write - A+4D 97 * other - nm_timeo 98 */ 99 #define NFS_RTO(n, t) \ 100 ((t) == 0 ? (n)->nm_timeo : \ 101 ((t) < 3 ? \ 102 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \ 103 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1))) 104 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1] 105 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1] 106 /* 107 * External data, mostly RPC constants in XDR form 108 */ 109 extern u_int32_t rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, 110 rpc_auth_unix, rpc_msgaccepted, rpc_call, rpc_autherr, 111 rpc_auth_kerb; 112 extern u_int32_t nfs_prog, nqnfs_prog; 113 extern time_t nqnfsstarttime; 114 extern const int nfsv3_procid[NFS_NPROCS]; 115 extern int nfs_ticks; 116 117 /* 118 * Defines which timer to use for the procnum. 119 * 0 - default 120 * 1 - getattr 121 * 2 - lookup 122 * 3 - read 123 * 4 - write 124 */ 125 static const int proct[NFS_NPROCS] = { 126 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, 127 0, 0, 0, 128 }; 129 130 /* 131 * There is a congestion window for outstanding rpcs maintained per mount 132 * point. The cwnd size is adjusted in roughly the way that: 133 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of 134 * SIGCOMM '88". ACM, August 1988. 135 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout 136 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd 137 * of rpcs is in progress. 138 * (The sent count and cwnd are scaled for integer arith.) 139 * Variants of "slow start" were tried and were found to be too much of a 140 * performance hit (ave. rtt 3 times larger), 141 * I suspect due to the large rtt that nfs rpcs have. 142 */ 143 #define NFS_CWNDSCALE 256 144 #define NFS_MAXCWND (NFS_CWNDSCALE * 32) 145 static const int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, }; 146 int nfsrtton = 0; 147 struct nfsrtt nfsrtt; 148 struct nfsreqhead nfs_reqq; 149 150 struct callout nfs_timer_ch = CALLOUT_INITIALIZER_SETFUNC(nfs_timer, NULL); 151 152 /* 153 * Initialize sockets and congestion for a new NFS connection. 154 * We do not free the sockaddr if error. 155 */ 156 int 157 nfs_connect(nmp, rep, p) 158 struct nfsmount *nmp; 159 struct nfsreq *rep; 160 struct proc *p; 161 { 162 struct socket *so; 163 int s, error, rcvreserve, sndreserve; 164 struct sockaddr *saddr; 165 struct sockaddr_in *sin; 166 #ifdef INET6 167 struct sockaddr_in6 *sin6; 168 #endif 169 struct mbuf *m; 170 171 nmp->nm_so = (struct socket *)0; 172 saddr = mtod(nmp->nm_nam, struct sockaddr *); 173 error = socreate(saddr->sa_family, &nmp->nm_so, 174 nmp->nm_sotype, nmp->nm_soproto, p); 175 if (error) 176 goto bad; 177 so = nmp->nm_so; 178 #ifdef MBUFTRACE 179 so->so_mowner = &nfs_mowner; 180 so->so_rcv.sb_mowner = &nfs_mowner; 181 so->so_snd.sb_mowner = &nfs_mowner; 182 #endif 183 nmp->nm_soflags = so->so_proto->pr_flags; 184 185 /* 186 * Some servers require that the client port be a reserved port number. 187 */ 188 if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) { 189 m = m_get(M_WAIT, MT_SOOPTS); 190 MCLAIM(m, so->so_mowner); 191 *mtod(m, int32_t *) = IP_PORTRANGE_LOW; 192 m->m_len = sizeof(int32_t); 193 if ((error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, m))) 194 goto bad; 195 m = m_get(M_WAIT, MT_SONAME); 196 MCLAIM(m, so->so_mowner); 197 sin = mtod(m, struct sockaddr_in *); 198 sin->sin_len = m->m_len = sizeof (struct sockaddr_in); 199 sin->sin_family = AF_INET; 200 sin->sin_addr.s_addr = INADDR_ANY; 201 sin->sin_port = 0; 202 error = sobind(so, m, &proc0); 203 m_freem(m); 204 if (error) 205 goto bad; 206 } 207 #ifdef INET6 208 if (saddr->sa_family == AF_INET6 && (nmp->nm_flag & NFSMNT_RESVPORT)) { 209 m = m_get(M_WAIT, MT_SOOPTS); 210 MCLAIM(m, so->so_mowner); 211 *mtod(m, int32_t *) = IPV6_PORTRANGE_LOW; 212 m->m_len = sizeof(int32_t); 213 if ((error = sosetopt(so, IPPROTO_IPV6, IPV6_PORTRANGE, m))) 214 goto bad; 215 m = m_get(M_WAIT, MT_SONAME); 216 MCLAIM(m, so->so_mowner); 217 sin6 = mtod(m, struct sockaddr_in6 *); 218 sin6->sin6_len = m->m_len = sizeof (struct sockaddr_in6); 219 sin6->sin6_family = AF_INET6; 220 sin6->sin6_addr = in6addr_any; 221 sin6->sin6_port = 0; 222 error = sobind(so, m, &proc0); 223 m_freem(m); 224 if (error) 225 goto bad; 226 } 227 #endif 228 229 /* 230 * Protocols that do not require connections may be optionally left 231 * unconnected for servers that reply from a port other than NFS_PORT. 232 */ 233 if (nmp->nm_flag & NFSMNT_NOCONN) { 234 if (nmp->nm_soflags & PR_CONNREQUIRED) { 235 error = ENOTCONN; 236 goto bad; 237 } 238 } else { 239 error = soconnect(so, nmp->nm_nam, p); 240 if (error) 241 goto bad; 242 243 /* 244 * Wait for the connection to complete. Cribbed from the 245 * connect system call but with the wait timing out so 246 * that interruptible mounts don't hang here for a long time. 247 */ 248 s = splsoftnet(); 249 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 250 (void) tsleep((caddr_t)&so->so_timeo, PSOCK, 251 "nfscn1", 2 * hz); 252 if ((so->so_state & SS_ISCONNECTING) && 253 so->so_error == 0 && rep && 254 (error = nfs_sigintr(nmp, rep, rep->r_procp)) != 0){ 255 so->so_state &= ~SS_ISCONNECTING; 256 splx(s); 257 goto bad; 258 } 259 } 260 if (so->so_error) { 261 error = so->so_error; 262 so->so_error = 0; 263 splx(s); 264 goto bad; 265 } 266 splx(s); 267 } 268 if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) { 269 so->so_rcv.sb_timeo = (5 * hz); 270 so->so_snd.sb_timeo = (5 * hz); 271 } else { 272 /* 273 * enable receive timeout to detect server crash and reconnect. 274 * otherwise, we can be stuck in soreceive forever. 275 */ 276 so->so_rcv.sb_timeo = (5 * hz); 277 so->so_snd.sb_timeo = 0; 278 } 279 if (nmp->nm_sotype == SOCK_DGRAM) { 280 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2; 281 rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + 282 NFS_MAXPKTHDR) * 2; 283 } else if (nmp->nm_sotype == SOCK_SEQPACKET) { 284 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2; 285 rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + 286 NFS_MAXPKTHDR) * 2; 287 } else { 288 if (nmp->nm_sotype != SOCK_STREAM) 289 panic("nfscon sotype"); 290 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 291 m = m_get(M_WAIT, MT_SOOPTS); 292 MCLAIM(m, so->so_mowner); 293 *mtod(m, int32_t *) = 1; 294 m->m_len = sizeof(int32_t); 295 sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m); 296 } 297 if (so->so_proto->pr_protocol == IPPROTO_TCP) { 298 m = m_get(M_WAIT, MT_SOOPTS); 299 MCLAIM(m, so->so_mowner); 300 *mtod(m, int32_t *) = 1; 301 m->m_len = sizeof(int32_t); 302 sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m); 303 } 304 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + 305 sizeof (u_int32_t)) * 2; 306 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + 307 sizeof (u_int32_t)) * 2; 308 } 309 error = soreserve(so, sndreserve, rcvreserve); 310 if (error) 311 goto bad; 312 so->so_rcv.sb_flags |= SB_NOINTR; 313 so->so_snd.sb_flags |= SB_NOINTR; 314 315 /* Initialize other non-zero congestion variables */ 316 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] = 317 NFS_TIMEO << 3; 318 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] = 319 nmp->nm_sdrtt[3] = 0; 320 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */ 321 nmp->nm_sent = 0; 322 nmp->nm_timeouts = 0; 323 return (0); 324 325 bad: 326 nfs_disconnect(nmp); 327 return (error); 328 } 329 330 /* 331 * Reconnect routine: 332 * Called when a connection is broken on a reliable protocol. 333 * - clean up the old socket 334 * - nfs_connect() again 335 * - set R_MUSTRESEND for all outstanding requests on mount point 336 * If this fails the mount point is DEAD! 337 * nb: Must be called with the nfs_sndlock() set on the mount point. 338 */ 339 int 340 nfs_reconnect(rep, p) 341 struct nfsreq *rep; 342 struct proc *p; 343 { 344 struct nfsreq *rp; 345 struct nfsmount *nmp = rep->r_nmp; 346 int error; 347 348 nfs_disconnect(nmp); 349 while ((error = nfs_connect(nmp, rep, p)) != 0) { 350 if (error == EINTR || error == ERESTART) 351 return (EINTR); 352 (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscn2", 0); 353 } 354 355 /* 356 * Loop through outstanding request list and fix up all requests 357 * on old socket. 358 */ 359 TAILQ_FOREACH(rp, &nfs_reqq, r_chain) { 360 if (rp->r_nmp == nmp) { 361 if ((rp->r_flags & R_MUSTRESEND) == 0) 362 rp->r_flags |= R_MUSTRESEND | R_REXMITTED; 363 rp->r_rexmit = 0; 364 } 365 } 366 return (0); 367 } 368 369 /* 370 * NFS disconnect. Clean up and unlink. 371 */ 372 void 373 nfs_disconnect(nmp) 374 struct nfsmount *nmp; 375 { 376 struct socket *so; 377 int drain = 0; 378 379 if (nmp->nm_so) { 380 so = nmp->nm_so; 381 nmp->nm_so = (struct socket *)0; 382 soshutdown(so, 2); 383 drain = (nmp->nm_iflag & NFSMNT_DISMNT) != 0; 384 if (drain) { 385 /* 386 * soshutdown() above should wake up the current 387 * listener. 388 * Now wake up those waiting for the receive lock, and 389 * wait for them to go away unhappy, to prevent *nmp 390 * from evaporating while they're sleeping. 391 */ 392 while (nmp->nm_waiters > 0) { 393 wakeup (&nmp->nm_iflag); 394 (void) tsleep(&nmp->nm_waiters, PVFS, 395 "nfsdis", 0); 396 } 397 } 398 soclose(so); 399 } 400 #ifdef DIAGNOSTIC 401 if (drain && (nmp->nm_waiters > 0)) 402 panic("nfs_disconnect: waiters left after drain?"); 403 #endif 404 } 405 406 void 407 nfs_safedisconnect(nmp) 408 struct nfsmount *nmp; 409 { 410 struct nfsreq dummyreq; 411 412 memset(&dummyreq, 0, sizeof(dummyreq)); 413 dummyreq.r_nmp = nmp; 414 nfs_rcvlock(&dummyreq); /* XXX ignored error return */ 415 nfs_disconnect(nmp); 416 nfs_rcvunlock(nmp); 417 } 418 419 /* 420 * This is the nfs send routine. For connection based socket types, it 421 * must be called with an nfs_sndlock() on the socket. 422 * "rep == NULL" indicates that it has been called from a server. 423 * For the client side: 424 * - return EINTR if the RPC is terminated, 0 otherwise 425 * - set R_MUSTRESEND if the send fails for any reason 426 * - do any cleanup required by recoverable socket errors (? ? ?) 427 * For the server side: 428 * - return EINTR or ERESTART if interrupted by a signal 429 * - return EPIPE if a connection is lost for connection based sockets (TCP...) 430 * - do any cleanup required by recoverable socket errors (? ? ?) 431 */ 432 int 433 nfs_send(so, nam, top, rep, p) 434 struct socket *so; 435 struct mbuf *nam; 436 struct mbuf *top; 437 struct nfsreq *rep; 438 struct proc *p; 439 { 440 struct mbuf *sendnam; 441 int error, soflags, flags; 442 443 /* XXX nfs_doio()/nfs_request() calls with rep->r_procp == NULL */ 444 if (p == NULL && rep->r_procp == NULL) { 445 #ifdef DIAGNOSTIC 446 static int warned = 0; 447 if (!warned) { 448 printf("nfs_send: proc botch: " 449 "rep %p arg %p curproc %p\n", 450 rep->r_procp, p, curproc); 451 warned = 1; 452 } 453 #endif /* DIAGNOSTIC */ 454 p = curproc; 455 } 456 457 if (rep) { 458 if (rep->r_flags & R_SOFTTERM) { 459 m_freem(top); 460 return (EINTR); 461 } 462 if ((so = rep->r_nmp->nm_so) == NULL) { 463 rep->r_flags |= R_MUSTRESEND; 464 m_freem(top); 465 return (0); 466 } 467 rep->r_flags &= ~R_MUSTRESEND; 468 soflags = rep->r_nmp->nm_soflags; 469 } else 470 soflags = so->so_proto->pr_flags; 471 if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) 472 sendnam = (struct mbuf *)0; 473 else 474 sendnam = nam; 475 if (so->so_type == SOCK_SEQPACKET) 476 flags = MSG_EOR; 477 else 478 flags = 0; 479 480 error = (*so->so_send)(so, sendnam, (struct uio *)0, top, 481 (struct mbuf *)0, flags, p); 482 if (error) { 483 if (rep) { 484 if (error == ENOBUFS && so->so_type == SOCK_DGRAM) { 485 /* 486 * We're too fast for the network/driver, 487 * and UDP isn't flowcontrolled. 488 * We need to resend. This is not fatal, 489 * just try again. 490 * 491 * Could be smarter here by doing some sort 492 * of a backoff, but this is rare. 493 */ 494 rep->r_flags |= R_MUSTRESEND; 495 } else { 496 if (error != EPIPE) 497 log(LOG_INFO, 498 "nfs send error %d for %s\n", 499 error, 500 rep->r_nmp->nm_mountp-> 501 mnt_stat.f_mntfromname); 502 /* 503 * Deal with errors for the client side. 504 */ 505 if (rep->r_flags & R_SOFTTERM) 506 error = EINTR; 507 else 508 rep->r_flags |= R_MUSTRESEND; 509 } 510 } else { 511 /* 512 * See above. This error can happen under normal 513 * circumstances and the log is too noisy. 514 * The error will still show up in nfsstat. 515 */ 516 if (error != ENOBUFS || so->so_type != SOCK_DGRAM) 517 log(LOG_INFO, "nfsd send error %d\n", error); 518 } 519 520 /* 521 * Handle any recoverable (soft) socket errors here. (? ? ?) 522 */ 523 if (error != EINTR && error != ERESTART && 524 error != EWOULDBLOCK && error != EPIPE) 525 error = 0; 526 } 527 return (error); 528 } 529 530 #ifdef NFS 531 /* 532 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all 533 * done by soreceive(), but for SOCK_STREAM we must deal with the Record 534 * Mark and consolidate the data into a new mbuf list. 535 * nb: Sometimes TCP passes the data up to soreceive() in long lists of 536 * small mbufs. 537 * For SOCK_STREAM we must be very careful to read an entire record once 538 * we have read any of it, even if the system call has been interrupted. 539 */ 540 int 541 nfs_receive(rep, aname, mp, p) 542 struct nfsreq *rep; 543 struct mbuf **aname; 544 struct mbuf **mp; 545 struct proc *p; 546 { 547 struct socket *so; 548 struct uio auio; 549 struct iovec aio; 550 struct mbuf *m; 551 struct mbuf *control; 552 u_int32_t len; 553 struct mbuf **getnam; 554 int error, sotype, rcvflg; 555 556 /* 557 * Set up arguments for soreceive() 558 */ 559 *mp = (struct mbuf *)0; 560 *aname = (struct mbuf *)0; 561 sotype = rep->r_nmp->nm_sotype; 562 563 /* 564 * For reliable protocols, lock against other senders/receivers 565 * in case a reconnect is necessary. 566 * For SOCK_STREAM, first get the Record Mark to find out how much 567 * more there is to get. 568 * We must lock the socket against other receivers 569 * until we have an entire rpc request/reply. 570 */ 571 if (sotype != SOCK_DGRAM) { 572 error = nfs_sndlock(&rep->r_nmp->nm_iflag, rep); 573 if (error) 574 return (error); 575 tryagain: 576 /* 577 * Check for fatal errors and resending request. 578 */ 579 /* 580 * Ugh: If a reconnect attempt just happened, nm_so 581 * would have changed. NULL indicates a failed 582 * attempt that has essentially shut down this 583 * mount point. 584 */ 585 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) { 586 nfs_sndunlock(&rep->r_nmp->nm_iflag); 587 return (EINTR); 588 } 589 so = rep->r_nmp->nm_so; 590 if (!so) { 591 error = nfs_reconnect(rep, p); 592 if (error) { 593 nfs_sndunlock(&rep->r_nmp->nm_iflag); 594 return (error); 595 } 596 goto tryagain; 597 } 598 while (rep->r_flags & R_MUSTRESEND) { 599 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 600 nfsstats.rpcretries++; 601 rep->r_rtt = 0; 602 rep->r_flags &= ~R_TIMING; 603 error = nfs_send(so, rep->r_nmp->nm_nam, m, rep, p); 604 if (error) { 605 if (error == EINTR || error == ERESTART || 606 (error = nfs_reconnect(rep, p)) != 0) { 607 nfs_sndunlock(&rep->r_nmp->nm_iflag); 608 return (error); 609 } 610 goto tryagain; 611 } 612 } 613 nfs_sndunlock(&rep->r_nmp->nm_iflag); 614 if (sotype == SOCK_STREAM) { 615 aio.iov_base = (caddr_t) &len; 616 aio.iov_len = sizeof(u_int32_t); 617 auio.uio_iov = &aio; 618 auio.uio_iovcnt = 1; 619 auio.uio_segflg = UIO_SYSSPACE; 620 auio.uio_rw = UIO_READ; 621 auio.uio_offset = 0; 622 auio.uio_resid = sizeof(u_int32_t); 623 auio.uio_procp = p; 624 do { 625 rcvflg = MSG_WAITALL; 626 error = (*so->so_receive)(so, (struct mbuf **)0, &auio, 627 (struct mbuf **)0, (struct mbuf **)0, &rcvflg); 628 if (error == EWOULDBLOCK && rep) { 629 if (rep->r_flags & R_SOFTTERM) 630 return (EINTR); 631 /* 632 * if it seems that the server died after it 633 * received our request, set EPIPE so that 634 * we'll reconnect and retransmit requests. 635 */ 636 if (rep->r_rexmit >= rep->r_nmp->nm_retry) { 637 nfsstats.rpctimeouts++; 638 error = EPIPE; 639 } 640 } 641 } while (error == EWOULDBLOCK); 642 if (!error && auio.uio_resid > 0) { 643 /* 644 * Don't log a 0 byte receive; it means 645 * that the socket has been closed, and 646 * can happen during normal operation 647 * (forcible unmount or Solaris server). 648 */ 649 if (auio.uio_resid != sizeof (u_int32_t)) 650 log(LOG_INFO, 651 "short receive (%lu/%lu) from nfs server %s\n", 652 (u_long)sizeof(u_int32_t) - auio.uio_resid, 653 (u_long)sizeof(u_int32_t), 654 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 655 error = EPIPE; 656 } 657 if (error) 658 goto errout; 659 len = ntohl(len) & ~0x80000000; 660 /* 661 * This is SERIOUS! We are out of sync with the sender 662 * and forcing a disconnect/reconnect is all I can do. 663 */ 664 if (len > NFS_MAXPACKET) { 665 log(LOG_ERR, "%s (%d) from nfs server %s\n", 666 "impossible packet length", 667 len, 668 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 669 error = EFBIG; 670 goto errout; 671 } 672 auio.uio_resid = len; 673 do { 674 rcvflg = MSG_WAITALL; 675 error = (*so->so_receive)(so, (struct mbuf **)0, 676 &auio, mp, (struct mbuf **)0, &rcvflg); 677 } while (error == EWOULDBLOCK || error == EINTR || 678 error == ERESTART); 679 if (!error && auio.uio_resid > 0) { 680 if (len != auio.uio_resid) 681 log(LOG_INFO, 682 "short receive (%lu/%d) from nfs server %s\n", 683 (u_long)len - auio.uio_resid, len, 684 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 685 error = EPIPE; 686 } 687 } else { 688 /* 689 * NB: Since uio_resid is big, MSG_WAITALL is ignored 690 * and soreceive() will return when it has either a 691 * control msg or a data msg. 692 * We have no use for control msg., but must grab them 693 * and then throw them away so we know what is going 694 * on. 695 */ 696 auio.uio_resid = len = 100000000; /* Anything Big */ 697 auio.uio_procp = p; 698 do { 699 rcvflg = 0; 700 error = (*so->so_receive)(so, (struct mbuf **)0, 701 &auio, mp, &control, &rcvflg); 702 if (control) 703 m_freem(control); 704 if (error == EWOULDBLOCK && rep) { 705 if (rep->r_flags & R_SOFTTERM) 706 return (EINTR); 707 } 708 } while (error == EWOULDBLOCK || 709 (!error && *mp == NULL && control)); 710 if ((rcvflg & MSG_EOR) == 0) 711 printf("Egad!!\n"); 712 if (!error && *mp == NULL) 713 error = EPIPE; 714 len -= auio.uio_resid; 715 } 716 errout: 717 if (error && error != EINTR && error != ERESTART) { 718 m_freem(*mp); 719 *mp = (struct mbuf *)0; 720 if (error != EPIPE) 721 log(LOG_INFO, 722 "receive error %d from nfs server %s\n", 723 error, 724 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 725 error = nfs_sndlock(&rep->r_nmp->nm_iflag, rep); 726 if (!error) 727 error = nfs_reconnect(rep, p); 728 if (!error) 729 goto tryagain; 730 else 731 nfs_sndunlock(&rep->r_nmp->nm_iflag); 732 } 733 } else { 734 if ((so = rep->r_nmp->nm_so) == NULL) 735 return (EACCES); 736 if (so->so_state & SS_ISCONNECTED) 737 getnam = (struct mbuf **)0; 738 else 739 getnam = aname; 740 auio.uio_resid = len = 1000000; 741 auio.uio_procp = p; 742 do { 743 rcvflg = 0; 744 error = (*so->so_receive)(so, getnam, &auio, mp, 745 (struct mbuf **)0, &rcvflg); 746 if (error == EWOULDBLOCK && 747 (rep->r_flags & R_SOFTTERM)) 748 return (EINTR); 749 } while (error == EWOULDBLOCK); 750 len -= auio.uio_resid; 751 if (!error && *mp == NULL) 752 error = EPIPE; 753 } 754 if (error) { 755 m_freem(*mp); 756 *mp = (struct mbuf *)0; 757 } 758 return (error); 759 } 760 761 /* 762 * Implement receipt of reply on a socket. 763 * We must search through the list of received datagrams matching them 764 * with outstanding requests using the xid, until ours is found. 765 */ 766 /* ARGSUSED */ 767 int 768 nfs_reply(myrep, procp) 769 struct nfsreq *myrep; 770 struct proc *procp; 771 { 772 struct nfsreq *rep; 773 struct nfsmount *nmp = myrep->r_nmp; 774 int32_t t1; 775 struct mbuf *mrep, *nam, *md; 776 u_int32_t rxid, *tl; 777 caddr_t dpos, cp2; 778 int error; 779 780 /* 781 * Loop around until we get our own reply 782 */ 783 for (;;) { 784 /* 785 * Lock against other receivers so that I don't get stuck in 786 * sbwait() after someone else has received my reply for me. 787 * Also necessary for connection based protocols to avoid 788 * race conditions during a reconnect. 789 */ 790 error = nfs_rcvlock(myrep); 791 if (error == EALREADY) 792 return (0); 793 if (error) 794 return (error); 795 /* 796 * Get the next Rpc reply off the socket 797 */ 798 nmp->nm_waiters++; 799 error = nfs_receive(myrep, &nam, &mrep, procp); 800 nfs_rcvunlock(nmp); 801 if (error) { 802 803 if (nmp->nm_iflag & NFSMNT_DISMNT) { 804 /* 805 * Oops, we're going away now.. 806 */ 807 nmp->nm_waiters--; 808 wakeup (&nmp->nm_waiters); 809 return error; 810 } 811 nmp->nm_waiters--; 812 /* 813 * Ignore routing errors on connectionless protocols? ? 814 */ 815 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { 816 nmp->nm_so->so_error = 0; 817 #ifdef DEBUG 818 printf("nfs_reply: ignoring error %d\n", error); 819 #endif 820 if (myrep->r_flags & R_GETONEREP) 821 return (0); 822 continue; 823 } 824 return (error); 825 } 826 nmp->nm_waiters--; 827 if (nam) 828 m_freem(nam); 829 830 /* 831 * Get the xid and check that it is an rpc reply 832 */ 833 md = mrep; 834 dpos = mtod(md, caddr_t); 835 nfsm_dissect(tl, u_int32_t *, 2*NFSX_UNSIGNED); 836 rxid = *tl++; 837 if (*tl != rpc_reply) { 838 #ifndef NFS_V2_ONLY 839 if (nmp->nm_flag & NFSMNT_NQNFS) { 840 if (nqnfs_callback(nmp, mrep, md, dpos)) 841 nfsstats.rpcinvalid++; 842 } else 843 #endif 844 { 845 nfsstats.rpcinvalid++; 846 m_freem(mrep); 847 } 848 nfsmout: 849 if (myrep->r_flags & R_GETONEREP) 850 return (0); 851 continue; 852 } 853 854 /* 855 * Loop through the request list to match up the reply 856 * Iff no match, just drop the datagram 857 */ 858 TAILQ_FOREACH(rep, &nfs_reqq, r_chain) { 859 if (rep->r_mrep == NULL && rxid == rep->r_xid) { 860 /* Found it.. */ 861 rep->r_mrep = mrep; 862 rep->r_md = md; 863 rep->r_dpos = dpos; 864 if (nfsrtton) { 865 struct rttl *rt; 866 867 rt = &nfsrtt.rttl[nfsrtt.pos]; 868 rt->proc = rep->r_procnum; 869 rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]); 870 rt->sent = nmp->nm_sent; 871 rt->cwnd = nmp->nm_cwnd; 872 rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1]; 873 rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1]; 874 rt->fsid = nmp->nm_mountp->mnt_stat.f_fsidx; 875 rt->tstamp = time; 876 if (rep->r_flags & R_TIMING) 877 rt->rtt = rep->r_rtt; 878 else 879 rt->rtt = 1000000; 880 nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ; 881 } 882 /* 883 * Update congestion window. 884 * Do the additive increase of 885 * one rpc/rtt. 886 */ 887 if (nmp->nm_cwnd <= nmp->nm_sent) { 888 nmp->nm_cwnd += 889 (NFS_CWNDSCALE * NFS_CWNDSCALE + 890 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; 891 if (nmp->nm_cwnd > NFS_MAXCWND) 892 nmp->nm_cwnd = NFS_MAXCWND; 893 } 894 rep->r_flags &= ~R_SENT; 895 nmp->nm_sent -= NFS_CWNDSCALE; 896 /* 897 * Update rtt using a gain of 0.125 on the mean 898 * and a gain of 0.25 on the deviation. 899 */ 900 if (rep->r_flags & R_TIMING) { 901 /* 902 * Since the timer resolution of 903 * NFS_HZ is so course, it can often 904 * result in r_rtt == 0. Since 905 * r_rtt == N means that the actual 906 * rtt is between N+dt and N+2-dt ticks, 907 * add 1. 908 */ 909 t1 = rep->r_rtt + 1; 910 t1 -= (NFS_SRTT(rep) >> 3); 911 NFS_SRTT(rep) += t1; 912 if (t1 < 0) 913 t1 = -t1; 914 t1 -= (NFS_SDRTT(rep) >> 2); 915 NFS_SDRTT(rep) += t1; 916 } 917 nmp->nm_timeouts = 0; 918 break; 919 } 920 } 921 /* 922 * If not matched to a request, drop it. 923 * If it's mine, get out. 924 */ 925 if (rep == 0) { 926 nfsstats.rpcunexpected++; 927 m_freem(mrep); 928 } else if (rep == myrep) { 929 if (rep->r_mrep == NULL) 930 panic("nfsreply nil"); 931 return (0); 932 } 933 if (myrep->r_flags & R_GETONEREP) 934 return (0); 935 } 936 } 937 938 /* 939 * nfs_request - goes something like this 940 * - fill in request struct 941 * - links it into list 942 * - calls nfs_send() for first transmit 943 * - calls nfs_receive() to get reply 944 * - break down rpc header and return with nfs reply pointed to 945 * by mrep or error 946 * nb: always frees up mreq mbuf list 947 */ 948 int 949 nfs_request(np, mrest, procnum, procp, cred, mrp, mdp, dposp, rexmitp) 950 struct nfsnode *np; 951 struct mbuf *mrest; 952 int procnum; 953 struct proc *procp; 954 struct ucred *cred; 955 struct mbuf **mrp; 956 struct mbuf **mdp; 957 caddr_t *dposp; 958 int *rexmitp; 959 { 960 struct mbuf *m, *mrep; 961 struct nfsreq *rep; 962 u_int32_t *tl; 963 int i; 964 struct nfsmount *nmp; 965 struct mbuf *md, *mheadend; 966 char nickv[RPCX_NICKVERF]; 967 time_t reqtime, waituntil; 968 caddr_t dpos, cp2; 969 int t1, s, error = 0, mrest_len, auth_len, auth_type; 970 int trylater_delay = NFS_TRYLATERDEL, failed_auth = 0; 971 int verf_len, verf_type; 972 u_int32_t xid; 973 char *auth_str, *verf_str; 974 NFSKERBKEY_T key; /* save session key */ 975 struct ucred acred; 976 #ifndef NFS_V2_ONLY 977 int nqlflag, cachable; 978 u_quad_t frev; 979 #endif 980 981 if (rexmitp != NULL) 982 *rexmitp = 0; 983 984 KASSERT(cred != NULL); 985 nmp = VFSTONFS(np->n_vnode->v_mount); 986 MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); 987 rep->r_nmp = nmp; 988 rep->r_procp = procp; 989 rep->r_procnum = procnum; 990 i = 0; 991 m = mrest; 992 while (m) { 993 i += m->m_len; 994 m = m->m_next; 995 } 996 mrest_len = i; 997 998 /* 999 * Get the RPC header with authorization. 1000 */ 1001 kerbauth: 1002 verf_str = auth_str = (char *)0; 1003 if (nmp->nm_flag & NFSMNT_KERB) { 1004 verf_str = nickv; 1005 verf_len = sizeof (nickv); 1006 auth_type = RPCAUTH_KERB4; 1007 memset((caddr_t)key, 0, sizeof (key)); 1008 if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str, 1009 &auth_len, verf_str, verf_len)) { 1010 error = nfs_getauth(nmp, rep, cred, &auth_str, 1011 &auth_len, verf_str, &verf_len, key); 1012 if (error) { 1013 free((caddr_t)rep, M_NFSREQ); 1014 m_freem(mrest); 1015 return (error); 1016 } 1017 } 1018 } else { 1019 switch (procnum) { 1020 case NFSPROC_READ: 1021 case NFSPROC_WRITE: 1022 case NFSPROC_COMMIT: 1023 acred.cr_uid = np->n_vattr->va_uid; 1024 acred.cr_gid = np->n_vattr->va_gid; 1025 acred.cr_ngroups = 0; 1026 acred.cr_ref = 2; /* Just to be safe.. */ 1027 cred = &acred; 1028 break; 1029 } 1030 auth_type = RPCAUTH_UNIX; 1031 auth_len = (((cred->cr_ngroups > nmp->nm_numgrps) ? 1032 nmp->nm_numgrps : cred->cr_ngroups) << 2) + 1033 5 * NFSX_UNSIGNED; 1034 } 1035 m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len, 1036 auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid); 1037 if (auth_str) 1038 free(auth_str, M_TEMP); 1039 1040 /* 1041 * For stream protocols, insert a Sun RPC Record Mark. 1042 */ 1043 if (nmp->nm_sotype == SOCK_STREAM) { 1044 M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); 1045 *mtod(m, u_int32_t *) = htonl(0x80000000 | 1046 (m->m_pkthdr.len - NFSX_UNSIGNED)); 1047 } 1048 rep->r_mreq = m; 1049 rep->r_xid = xid; 1050 tryagain: 1051 if (nmp->nm_flag & NFSMNT_SOFT) 1052 rep->r_retry = nmp->nm_retry; 1053 else 1054 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ 1055 rep->r_rtt = rep->r_rexmit = 0; 1056 if (proct[procnum] > 0) 1057 rep->r_flags = R_TIMING; 1058 else 1059 rep->r_flags = 0; 1060 rep->r_mrep = NULL; 1061 1062 /* 1063 * Do the client side RPC. 1064 */ 1065 nfsstats.rpcrequests++; 1066 /* 1067 * Chain request into list of outstanding requests. Be sure 1068 * to put it LAST so timer finds oldest requests first. 1069 */ 1070 s = splsoftnet(); 1071 TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain); 1072 1073 /* Get send time for nqnfs */ 1074 reqtime = time.tv_sec; 1075 1076 /* 1077 * If backing off another request or avoiding congestion, don't 1078 * send this one now but let timer do it. If not timing a request, 1079 * do it now. 1080 */ 1081 if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM || 1082 (nmp->nm_flag & NFSMNT_DUMBTIMR) || 1083 nmp->nm_sent < nmp->nm_cwnd)) { 1084 splx(s); 1085 if (nmp->nm_soflags & PR_CONNREQUIRED) 1086 error = nfs_sndlock(&nmp->nm_iflag, rep); 1087 if (!error) { 1088 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 1089 error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep, procp); 1090 if (nmp->nm_soflags & PR_CONNREQUIRED) 1091 nfs_sndunlock(&nmp->nm_iflag); 1092 } 1093 if (!error && (rep->r_flags & R_MUSTRESEND) == 0) { 1094 nmp->nm_sent += NFS_CWNDSCALE; 1095 rep->r_flags |= R_SENT; 1096 } 1097 } else { 1098 splx(s); 1099 rep->r_rtt = -1; 1100 } 1101 1102 /* 1103 * Wait for the reply from our send or the timer's. 1104 */ 1105 if (!error || error == EPIPE) 1106 error = nfs_reply(rep, procp); 1107 1108 /* 1109 * RPC done, unlink the request. 1110 */ 1111 s = splsoftnet(); 1112 TAILQ_REMOVE(&nfs_reqq, rep, r_chain); 1113 splx(s); 1114 1115 /* 1116 * Decrement the outstanding request count. 1117 */ 1118 if (rep->r_flags & R_SENT) { 1119 rep->r_flags &= ~R_SENT; /* paranoia */ 1120 nmp->nm_sent -= NFS_CWNDSCALE; 1121 } 1122 1123 if (rexmitp != NULL) { 1124 int rexmit; 1125 1126 if (nmp->nm_sotype != SOCK_DGRAM) 1127 rexmit = (rep->r_flags & R_REXMITTED) != 0; 1128 else 1129 rexmit = rep->r_rexmit; 1130 *rexmitp = rexmit; 1131 } 1132 1133 /* 1134 * If there was a successful reply and a tprintf msg. 1135 * tprintf a response. 1136 */ 1137 if (!error && (rep->r_flags & R_TPRINTFMSG)) 1138 nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname, 1139 "is alive again"); 1140 mrep = rep->r_mrep; 1141 md = rep->r_md; 1142 dpos = rep->r_dpos; 1143 if (error) { 1144 m_freem(rep->r_mreq); 1145 free((caddr_t)rep, M_NFSREQ); 1146 return (error); 1147 } 1148 1149 /* 1150 * break down the rpc header and check if ok 1151 */ 1152 nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 1153 if (*tl++ == rpc_msgdenied) { 1154 if (*tl == rpc_mismatch) 1155 error = EOPNOTSUPP; 1156 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) { 1157 if (!failed_auth) { 1158 failed_auth++; 1159 mheadend->m_next = (struct mbuf *)0; 1160 m_freem(mrep); 1161 m_freem(rep->r_mreq); 1162 goto kerbauth; 1163 } else 1164 error = EAUTH; 1165 } else 1166 error = EACCES; 1167 m_freem(mrep); 1168 m_freem(rep->r_mreq); 1169 free((caddr_t)rep, M_NFSREQ); 1170 return (error); 1171 } 1172 1173 /* 1174 * Grab any Kerberos verifier, otherwise just throw it away. 1175 */ 1176 verf_type = fxdr_unsigned(int, *tl++); 1177 i = fxdr_unsigned(int32_t, *tl); 1178 if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) { 1179 error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep); 1180 if (error) 1181 goto nfsmout; 1182 } else if (i > 0) 1183 nfsm_adv(nfsm_rndup(i)); 1184 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); 1185 /* 0 == ok */ 1186 if (*tl == 0) { 1187 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); 1188 if (*tl != 0) { 1189 error = fxdr_unsigned(int, *tl); 1190 if ((nmp->nm_flag & NFSMNT_NFSV3) && 1191 error == NFSERR_TRYLATER) { 1192 m_freem(mrep); 1193 error = 0; 1194 waituntil = time.tv_sec + trylater_delay; 1195 while (time.tv_sec < waituntil) 1196 (void) tsleep((caddr_t)&lbolt, 1197 PSOCK, "nqnfstry", 0); 1198 trylater_delay *= NFS_TRYLATERDELMUL; 1199 if (trylater_delay > NFS_TRYLATERDELMAX) 1200 trylater_delay = NFS_TRYLATERDELMAX; 1201 /* 1202 * RFC1813: 1203 * The client should wait and then try 1204 * the request with a new RPC transaction ID. 1205 */ 1206 nfs_renewxid(rep); 1207 goto tryagain; 1208 } 1209 1210 /* 1211 * If the File Handle was stale, invalidate the 1212 * lookup cache, just in case. 1213 */ 1214 if (error == ESTALE) 1215 cache_purge(NFSTOV(np)); 1216 if (nmp->nm_flag & NFSMNT_NFSV3) { 1217 *mrp = mrep; 1218 *mdp = md; 1219 *dposp = dpos; 1220 error |= NFSERR_RETERR; 1221 } else 1222 m_freem(mrep); 1223 m_freem(rep->r_mreq); 1224 free((caddr_t)rep, M_NFSREQ); 1225 return (error); 1226 } 1227 1228 #ifndef NFS_V2_ONLY 1229 /* 1230 * For nqnfs, get any lease in reply 1231 */ 1232 if (nmp->nm_flag & NFSMNT_NQNFS) { 1233 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); 1234 if (*tl) { 1235 nqlflag = fxdr_unsigned(int, *tl); 1236 nfsm_dissect(tl, u_int32_t *, 4*NFSX_UNSIGNED); 1237 cachable = fxdr_unsigned(int, *tl++); 1238 reqtime += fxdr_unsigned(int, *tl++); 1239 if (reqtime > time.tv_sec) { 1240 frev = fxdr_hyper(tl); 1241 nqnfs_clientlease(nmp, np, nqlflag, 1242 cachable, reqtime, frev); 1243 } 1244 } 1245 } 1246 #endif 1247 *mrp = mrep; 1248 *mdp = md; 1249 *dposp = dpos; 1250 m_freem(rep->r_mreq); 1251 FREE((caddr_t)rep, M_NFSREQ); 1252 return (0); 1253 } 1254 m_freem(mrep); 1255 error = EPROTONOSUPPORT; 1256 nfsmout: 1257 m_freem(rep->r_mreq); 1258 free((caddr_t)rep, M_NFSREQ); 1259 return (error); 1260 } 1261 #endif /* NFS */ 1262 1263 /* 1264 * Generate the rpc reply header 1265 * siz arg. is used to decide if adding a cluster is worthwhile 1266 */ 1267 int 1268 nfs_rephead(siz, nd, slp, err, cache, frev, mrq, mbp, bposp) 1269 int siz; 1270 struct nfsrv_descript *nd; 1271 struct nfssvc_sock *slp; 1272 int err; 1273 int cache; 1274 u_quad_t *frev; 1275 struct mbuf **mrq; 1276 struct mbuf **mbp; 1277 caddr_t *bposp; 1278 { 1279 u_int32_t *tl; 1280 struct mbuf *mreq; 1281 caddr_t bpos; 1282 struct mbuf *mb; 1283 1284 mreq = m_gethdr(M_WAIT, MT_DATA); 1285 MCLAIM(mreq, &nfs_mowner); 1286 mb = mreq; 1287 /* 1288 * If this is a big reply, use a cluster else 1289 * try and leave leading space for the lower level headers. 1290 */ 1291 siz += RPC_REPLYSIZ; 1292 if (siz >= max_datalen) { 1293 m_clget(mreq, M_WAIT); 1294 } else 1295 mreq->m_data += max_hdr; 1296 tl = mtod(mreq, u_int32_t *); 1297 mreq->m_len = 6 * NFSX_UNSIGNED; 1298 bpos = ((caddr_t)tl) + mreq->m_len; 1299 *tl++ = txdr_unsigned(nd->nd_retxid); 1300 *tl++ = rpc_reply; 1301 if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) { 1302 *tl++ = rpc_msgdenied; 1303 if (err & NFSERR_AUTHERR) { 1304 *tl++ = rpc_autherr; 1305 *tl = txdr_unsigned(err & ~NFSERR_AUTHERR); 1306 mreq->m_len -= NFSX_UNSIGNED; 1307 bpos -= NFSX_UNSIGNED; 1308 } else { 1309 *tl++ = rpc_mismatch; 1310 *tl++ = txdr_unsigned(RPC_VER2); 1311 *tl = txdr_unsigned(RPC_VER2); 1312 } 1313 } else { 1314 *tl++ = rpc_msgaccepted; 1315 1316 /* 1317 * For Kerberos authentication, we must send the nickname 1318 * verifier back, otherwise just RPCAUTH_NULL. 1319 */ 1320 if (nd->nd_flag & ND_KERBFULL) { 1321 struct nfsuid *nuidp; 1322 struct timeval ktvin, ktvout; 1323 1324 LIST_FOREACH(nuidp, NUIDHASH(slp, nd->nd_cr.cr_uid), 1325 nu_hash) { 1326 if (nuidp->nu_cr.cr_uid == nd->nd_cr.cr_uid && 1327 (!nd->nd_nam2 || netaddr_match( 1328 NU_NETFAM(nuidp), &nuidp->nu_haddr, 1329 nd->nd_nam2))) 1330 break; 1331 } 1332 if (nuidp) { 1333 ktvin.tv_sec = 1334 txdr_unsigned(nuidp->nu_timestamp.tv_sec 1335 - 1); 1336 ktvin.tv_usec = 1337 txdr_unsigned(nuidp->nu_timestamp.tv_usec); 1338 1339 /* 1340 * Encrypt the timestamp in ecb mode using the 1341 * session key. 1342 */ 1343 #ifdef NFSKERB 1344 XXX 1345 #endif 1346 1347 *tl++ = rpc_auth_kerb; 1348 *tl++ = txdr_unsigned(3 * NFSX_UNSIGNED); 1349 *tl = ktvout.tv_sec; 1350 nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 1351 *tl++ = ktvout.tv_usec; 1352 *tl++ = txdr_unsigned(nuidp->nu_cr.cr_uid); 1353 } else { 1354 *tl++ = 0; 1355 *tl++ = 0; 1356 } 1357 } else { 1358 *tl++ = 0; 1359 *tl++ = 0; 1360 } 1361 switch (err) { 1362 case EPROGUNAVAIL: 1363 *tl = txdr_unsigned(RPC_PROGUNAVAIL); 1364 break; 1365 case EPROGMISMATCH: 1366 *tl = txdr_unsigned(RPC_PROGMISMATCH); 1367 nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 1368 if (nd->nd_flag & ND_NQNFS) { 1369 *tl++ = txdr_unsigned(3); 1370 *tl = txdr_unsigned(3); 1371 } else { 1372 *tl++ = txdr_unsigned(2); 1373 *tl = txdr_unsigned(3); 1374 } 1375 break; 1376 case EPROCUNAVAIL: 1377 *tl = txdr_unsigned(RPC_PROCUNAVAIL); 1378 break; 1379 case EBADRPC: 1380 *tl = txdr_unsigned(RPC_GARBAGE); 1381 break; 1382 default: 1383 *tl = 0; 1384 if (err != NFSERR_RETVOID) { 1385 nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); 1386 if (err) 1387 *tl = txdr_unsigned(nfsrv_errmap(nd, err)); 1388 else 1389 *tl = 0; 1390 } 1391 break; 1392 }; 1393 } 1394 1395 /* 1396 * For nqnfs, piggyback lease as requested. 1397 */ 1398 if ((nd->nd_flag & ND_NQNFS) && err == 0) { 1399 if (nd->nd_flag & ND_LEASE) { 1400 nfsm_build(tl, u_int32_t *, 5 * NFSX_UNSIGNED); 1401 *tl++ = txdr_unsigned(nd->nd_flag & ND_LEASE); 1402 *tl++ = txdr_unsigned(cache); 1403 *tl++ = txdr_unsigned(nd->nd_duration); 1404 txdr_hyper(*frev, tl); 1405 } else { 1406 nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); 1407 *tl = 0; 1408 } 1409 } 1410 if (mrq != NULL) 1411 *mrq = mreq; 1412 *mbp = mb; 1413 *bposp = bpos; 1414 if (err != 0 && err != NFSERR_RETVOID) 1415 nfsstats.srvrpc_errs++; 1416 return (0); 1417 } 1418 1419 /* 1420 * Nfs timer routine 1421 * Scan the nfsreq list and retranmit any requests that have timed out 1422 * To avoid retransmission attempts on STREAM sockets (in the future) make 1423 * sure to set the r_retry field to 0 (implies nm_retry == 0). 1424 */ 1425 void 1426 nfs_timer(arg) 1427 void *arg; /* never used */ 1428 { 1429 struct nfsreq *rep; 1430 struct mbuf *m; 1431 struct socket *so; 1432 struct nfsmount *nmp; 1433 int timeo; 1434 int s, error; 1435 #ifdef NFSSERVER 1436 struct nfssvc_sock *slp; 1437 static long lasttime = 0; 1438 u_quad_t cur_usec; 1439 #endif 1440 1441 s = splsoftnet(); 1442 TAILQ_FOREACH(rep, &nfs_reqq, r_chain) { 1443 nmp = rep->r_nmp; 1444 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) 1445 continue; 1446 if (nfs_sigintr(nmp, rep, rep->r_procp)) { 1447 rep->r_flags |= R_SOFTTERM; 1448 continue; 1449 } 1450 if (rep->r_rtt >= 0) { 1451 rep->r_rtt++; 1452 if (nmp->nm_flag & NFSMNT_DUMBTIMR) 1453 timeo = nmp->nm_timeo; 1454 else 1455 timeo = NFS_RTO(nmp, proct[rep->r_procnum]); 1456 if (nmp->nm_timeouts > 0) 1457 timeo *= nfs_backoff[nmp->nm_timeouts - 1]; 1458 if (rep->r_rtt <= timeo) 1459 continue; 1460 if (nmp->nm_timeouts < 1461 (sizeof(nfs_backoff) / sizeof(nfs_backoff[0]))) 1462 nmp->nm_timeouts++; 1463 } 1464 /* 1465 * Check for server not responding 1466 */ 1467 if ((rep->r_flags & R_TPRINTFMSG) == 0 && 1468 rep->r_rexmit > nmp->nm_deadthresh) { 1469 nfs_msg(rep->r_procp, 1470 nmp->nm_mountp->mnt_stat.f_mntfromname, 1471 "not responding"); 1472 rep->r_flags |= R_TPRINTFMSG; 1473 } 1474 if (rep->r_rexmit >= rep->r_retry) { /* too many */ 1475 nfsstats.rpctimeouts++; 1476 rep->r_flags |= R_SOFTTERM; 1477 continue; 1478 } 1479 if (nmp->nm_sotype != SOCK_DGRAM) { 1480 if (++rep->r_rexmit > NFS_MAXREXMIT) 1481 rep->r_rexmit = NFS_MAXREXMIT; 1482 continue; 1483 } 1484 if ((so = nmp->nm_so) == NULL) 1485 continue; 1486 1487 /* 1488 * If there is enough space and the window allows.. 1489 * Resend it 1490 * Set r_rtt to -1 in case we fail to send it now. 1491 */ 1492 rep->r_rtt = -1; 1493 if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && 1494 ((nmp->nm_flag & NFSMNT_DUMBTIMR) || 1495 (rep->r_flags & R_SENT) || 1496 nmp->nm_sent < nmp->nm_cwnd) && 1497 (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){ 1498 if (so->so_state & SS_ISCONNECTED) 1499 error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 1500 (struct mbuf *)0, (struct mbuf *)0, (struct proc *)0); 1501 else 1502 error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 1503 nmp->nm_nam, (struct mbuf *)0, (struct proc *)0); 1504 if (error) { 1505 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { 1506 #ifdef DEBUG 1507 printf("nfs_timer: ignoring error %d\n", 1508 error); 1509 #endif 1510 so->so_error = 0; 1511 } 1512 } else { 1513 /* 1514 * Iff first send, start timing 1515 * else turn timing off, backoff timer 1516 * and divide congestion window by 2. 1517 */ 1518 if (rep->r_flags & R_SENT) { 1519 rep->r_flags &= ~R_TIMING; 1520 if (++rep->r_rexmit > NFS_MAXREXMIT) 1521 rep->r_rexmit = NFS_MAXREXMIT; 1522 nmp->nm_cwnd >>= 1; 1523 if (nmp->nm_cwnd < NFS_CWNDSCALE) 1524 nmp->nm_cwnd = NFS_CWNDSCALE; 1525 nfsstats.rpcretries++; 1526 } else { 1527 rep->r_flags |= R_SENT; 1528 nmp->nm_sent += NFS_CWNDSCALE; 1529 } 1530 rep->r_rtt = 0; 1531 } 1532 } 1533 } 1534 1535 #ifdef NFSSERVER 1536 /* 1537 * Call the nqnfs server timer once a second to handle leases. 1538 */ 1539 if (lasttime != time.tv_sec) { 1540 lasttime = time.tv_sec; 1541 nqnfs_serverd(); 1542 } 1543 1544 /* 1545 * Scan the write gathering queues for writes that need to be 1546 * completed now. 1547 */ 1548 cur_usec = (u_quad_t)time.tv_sec * 1000000 + (u_quad_t)time.tv_usec; 1549 TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) { 1550 if (LIST_FIRST(&slp->ns_tq) && 1551 LIST_FIRST(&slp->ns_tq)->nd_time <= cur_usec) 1552 nfsrv_wakenfsd(slp); 1553 } 1554 #endif /* NFSSERVER */ 1555 splx(s); 1556 callout_schedule(&nfs_timer_ch, nfs_ticks); 1557 } 1558 1559 /*ARGSUSED*/ 1560 void 1561 nfs_exit(p, v) 1562 struct proc *p; 1563 void *v; 1564 { 1565 struct nfsreq *rp; 1566 int s = splsoftnet(); 1567 1568 TAILQ_FOREACH(rp, &nfs_reqq, r_chain) { 1569 if (rp->r_procp == p) 1570 TAILQ_REMOVE(&nfs_reqq, rp, r_chain); 1571 } 1572 splx(s); 1573 } 1574 1575 /* 1576 * Test for a termination condition pending on the process. 1577 * This is used for NFSMNT_INT mounts. 1578 */ 1579 int 1580 nfs_sigintr(nmp, rep, p) 1581 struct nfsmount *nmp; 1582 struct nfsreq *rep; 1583 struct proc *p; 1584 { 1585 sigset_t ss; 1586 1587 if (rep && (rep->r_flags & R_SOFTTERM)) 1588 return (EINTR); 1589 if (!(nmp->nm_flag & NFSMNT_INT)) 1590 return (0); 1591 if (p) { 1592 sigpending1(p, &ss); 1593 #if 0 1594 sigminusset(&p->p_sigctx.ps_sigignore, &ss); 1595 #endif 1596 if (sigismember(&ss, SIGINT) || sigismember(&ss, SIGTERM) || 1597 sigismember(&ss, SIGKILL) || sigismember(&ss, SIGHUP) || 1598 sigismember(&ss, SIGQUIT)) 1599 return (EINTR); 1600 } 1601 return (0); 1602 } 1603 1604 /* 1605 * Lock a socket against others. 1606 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply 1607 * and also to avoid race conditions between the processes with nfs requests 1608 * in progress when a reconnect is necessary. 1609 */ 1610 int 1611 nfs_sndlock(flagp, rep) 1612 int *flagp; 1613 struct nfsreq *rep; 1614 { 1615 struct proc *p; 1616 int slpflag = 0, slptimeo = 0; 1617 1618 if (rep) { 1619 p = rep->r_procp; 1620 if (rep->r_nmp->nm_flag & NFSMNT_INT) 1621 slpflag = PCATCH; 1622 } else 1623 p = (struct proc *)0; 1624 while (*flagp & NFSMNT_SNDLOCK) { 1625 if (rep && nfs_sigintr(rep->r_nmp, rep, p)) 1626 return (EINTR); 1627 *flagp |= NFSMNT_WANTSND; 1628 (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsndlck", 1629 slptimeo); 1630 if (slpflag == PCATCH) { 1631 slpflag = 0; 1632 slptimeo = 2 * hz; 1633 } 1634 } 1635 *flagp |= NFSMNT_SNDLOCK; 1636 return (0); 1637 } 1638 1639 /* 1640 * Unlock the stream socket for others. 1641 */ 1642 void 1643 nfs_sndunlock(flagp) 1644 int *flagp; 1645 { 1646 1647 if ((*flagp & NFSMNT_SNDLOCK) == 0) 1648 panic("nfs sndunlock"); 1649 *flagp &= ~NFSMNT_SNDLOCK; 1650 if (*flagp & NFSMNT_WANTSND) { 1651 *flagp &= ~NFSMNT_WANTSND; 1652 wakeup((caddr_t)flagp); 1653 } 1654 } 1655 1656 int 1657 nfs_rcvlock(rep) 1658 struct nfsreq *rep; 1659 { 1660 struct nfsmount *nmp = rep->r_nmp; 1661 int *flagp = &nmp->nm_iflag; 1662 int slpflag, slptimeo = 0; 1663 int error = 0; 1664 1665 if (*flagp & NFSMNT_DISMNT) 1666 return EIO; 1667 1668 if (*flagp & NFSMNT_INT) 1669 slpflag = PCATCH; 1670 else 1671 slpflag = 0; 1672 simple_lock(&nmp->nm_slock); 1673 while (*flagp & NFSMNT_RCVLOCK) { 1674 if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp)) { 1675 error = EINTR; 1676 goto quit; 1677 } 1678 *flagp |= NFSMNT_WANTRCV; 1679 nmp->nm_waiters++; 1680 (void) ltsleep(flagp, slpflag | (PZERO - 1), "nfsrcvlk", 1681 slptimeo, &nmp->nm_slock); 1682 nmp->nm_waiters--; 1683 if (*flagp & NFSMNT_DISMNT) { 1684 wakeup(&nmp->nm_waiters); 1685 error = EIO; 1686 goto quit; 1687 } 1688 /* If our reply was received while we were sleeping, 1689 * then just return without taking the lock to avoid a 1690 * situation where a single iod could 'capture' the 1691 * receive lock. 1692 */ 1693 if (rep->r_mrep != NULL) { 1694 error = EALREADY; 1695 goto quit; 1696 } 1697 if (slpflag == PCATCH) { 1698 slpflag = 0; 1699 slptimeo = 2 * hz; 1700 } 1701 } 1702 *flagp |= NFSMNT_RCVLOCK; 1703 quit: 1704 simple_unlock(&nmp->nm_slock); 1705 return error; 1706 } 1707 1708 /* 1709 * Unlock the stream socket for others. 1710 */ 1711 void 1712 nfs_rcvunlock(nmp) 1713 struct nfsmount *nmp; 1714 { 1715 int *flagp = &nmp->nm_iflag; 1716 1717 simple_lock(&nmp->nm_slock); 1718 if ((*flagp & NFSMNT_RCVLOCK) == 0) 1719 panic("nfs rcvunlock"); 1720 *flagp &= ~NFSMNT_RCVLOCK; 1721 if (*flagp & NFSMNT_WANTRCV) { 1722 *flagp &= ~NFSMNT_WANTRCV; 1723 wakeup((caddr_t)flagp); 1724 } 1725 simple_unlock(&nmp->nm_slock); 1726 } 1727 1728 /* 1729 * Parse an RPC request 1730 * - verify it 1731 * - fill in the cred struct. 1732 */ 1733 int 1734 nfs_getreq(nd, nfsd, has_header) 1735 struct nfsrv_descript *nd; 1736 struct nfsd *nfsd; 1737 int has_header; 1738 { 1739 int len, i; 1740 u_int32_t *tl; 1741 int32_t t1; 1742 struct uio uio; 1743 struct iovec iov; 1744 caddr_t dpos, cp2, cp; 1745 u_int32_t nfsvers, auth_type; 1746 uid_t nickuid; 1747 int error = 0, nqnfs = 0, ticklen; 1748 struct mbuf *mrep, *md; 1749 struct nfsuid *nuidp; 1750 struct timeval tvin, tvout; 1751 1752 mrep = nd->nd_mrep; 1753 md = nd->nd_md; 1754 dpos = nd->nd_dpos; 1755 if (has_header) { 1756 nfsm_dissect(tl, u_int32_t *, 10 * NFSX_UNSIGNED); 1757 nd->nd_retxid = fxdr_unsigned(u_int32_t, *tl++); 1758 if (*tl++ != rpc_call) { 1759 m_freem(mrep); 1760 return (EBADRPC); 1761 } 1762 } else 1763 nfsm_dissect(tl, u_int32_t *, 8 * NFSX_UNSIGNED); 1764 nd->nd_repstat = 0; 1765 nd->nd_flag = 0; 1766 if (*tl++ != rpc_vers) { 1767 nd->nd_repstat = ERPCMISMATCH; 1768 nd->nd_procnum = NFSPROC_NOOP; 1769 return (0); 1770 } 1771 if (*tl != nfs_prog) { 1772 if (*tl == nqnfs_prog) 1773 nqnfs++; 1774 else { 1775 nd->nd_repstat = EPROGUNAVAIL; 1776 nd->nd_procnum = NFSPROC_NOOP; 1777 return (0); 1778 } 1779 } 1780 tl++; 1781 nfsvers = fxdr_unsigned(u_int32_t, *tl++); 1782 if (((nfsvers < NFS_VER2 || nfsvers > NFS_VER3) && !nqnfs) || 1783 (nfsvers != NQNFS_VER3 && nqnfs)) { 1784 nd->nd_repstat = EPROGMISMATCH; 1785 nd->nd_procnum = NFSPROC_NOOP; 1786 return (0); 1787 } 1788 if (nqnfs) 1789 nd->nd_flag = (ND_NFSV3 | ND_NQNFS); 1790 else if (nfsvers == NFS_VER3) 1791 nd->nd_flag = ND_NFSV3; 1792 nd->nd_procnum = fxdr_unsigned(u_int32_t, *tl++); 1793 if (nd->nd_procnum == NFSPROC_NULL) 1794 return (0); 1795 if (nd->nd_procnum >= NFS_NPROCS || 1796 (!nqnfs && nd->nd_procnum >= NQNFSPROC_GETLEASE) || 1797 (!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) { 1798 nd->nd_repstat = EPROCUNAVAIL; 1799 nd->nd_procnum = NFSPROC_NOOP; 1800 return (0); 1801 } 1802 if ((nd->nd_flag & ND_NFSV3) == 0) 1803 nd->nd_procnum = nfsv3_procid[nd->nd_procnum]; 1804 auth_type = *tl++; 1805 len = fxdr_unsigned(int, *tl++); 1806 if (len < 0 || len > RPCAUTH_MAXSIZ) { 1807 m_freem(mrep); 1808 return (EBADRPC); 1809 } 1810 1811 nd->nd_flag &= ~ND_KERBAUTH; 1812 /* 1813 * Handle auth_unix or auth_kerb. 1814 */ 1815 if (auth_type == rpc_auth_unix) { 1816 len = fxdr_unsigned(int, *++tl); 1817 if (len < 0 || len > NFS_MAXNAMLEN) { 1818 m_freem(mrep); 1819 return (EBADRPC); 1820 } 1821 nfsm_adv(nfsm_rndup(len)); 1822 nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 1823 memset((caddr_t)&nd->nd_cr, 0, sizeof (struct ucred)); 1824 nd->nd_cr.cr_ref = 1; 1825 nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++); 1826 nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++); 1827 len = fxdr_unsigned(int, *tl); 1828 if (len < 0 || len > RPCAUTH_UNIXGIDS) { 1829 m_freem(mrep); 1830 return (EBADRPC); 1831 } 1832 nfsm_dissect(tl, u_int32_t *, (len + 2) * NFSX_UNSIGNED); 1833 for (i = 0; i < len; i++) 1834 if (i < NGROUPS) 1835 nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++); 1836 else 1837 tl++; 1838 nd->nd_cr.cr_ngroups = (len > NGROUPS) ? NGROUPS : len; 1839 if (nd->nd_cr.cr_ngroups > 1) 1840 nfsrvw_sort(nd->nd_cr.cr_groups, nd->nd_cr.cr_ngroups); 1841 len = fxdr_unsigned(int, *++tl); 1842 if (len < 0 || len > RPCAUTH_MAXSIZ) { 1843 m_freem(mrep); 1844 return (EBADRPC); 1845 } 1846 if (len > 0) 1847 nfsm_adv(nfsm_rndup(len)); 1848 } else if (auth_type == rpc_auth_kerb) { 1849 switch (fxdr_unsigned(int, *tl++)) { 1850 case RPCAKN_FULLNAME: 1851 ticklen = fxdr_unsigned(int, *tl); 1852 *((u_int32_t *)nfsd->nfsd_authstr) = *tl; 1853 uio.uio_resid = nfsm_rndup(ticklen) + NFSX_UNSIGNED; 1854 nfsd->nfsd_authlen = uio.uio_resid + NFSX_UNSIGNED; 1855 if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) { 1856 m_freem(mrep); 1857 return (EBADRPC); 1858 } 1859 uio.uio_offset = 0; 1860 uio.uio_iov = &iov; 1861 uio.uio_iovcnt = 1; 1862 uio.uio_segflg = UIO_SYSSPACE; 1863 iov.iov_base = (caddr_t)&nfsd->nfsd_authstr[4]; 1864 iov.iov_len = RPCAUTH_MAXSIZ - 4; 1865 nfsm_mtouio(&uio, uio.uio_resid); 1866 nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 1867 if (*tl++ != rpc_auth_kerb || 1868 fxdr_unsigned(int, *tl) != 4 * NFSX_UNSIGNED) { 1869 printf("Bad kerb verifier\n"); 1870 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF); 1871 nd->nd_procnum = NFSPROC_NOOP; 1872 return (0); 1873 } 1874 nfsm_dissect(cp, caddr_t, 4 * NFSX_UNSIGNED); 1875 tl = (u_int32_t *)cp; 1876 if (fxdr_unsigned(int, *tl) != RPCAKN_FULLNAME) { 1877 printf("Not fullname kerb verifier\n"); 1878 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF); 1879 nd->nd_procnum = NFSPROC_NOOP; 1880 return (0); 1881 } 1882 cp += NFSX_UNSIGNED; 1883 memcpy(nfsd->nfsd_verfstr, cp, 3 * NFSX_UNSIGNED); 1884 nfsd->nfsd_verflen = 3 * NFSX_UNSIGNED; 1885 nd->nd_flag |= ND_KERBFULL; 1886 nfsd->nfsd_flag |= NFSD_NEEDAUTH; 1887 break; 1888 case RPCAKN_NICKNAME: 1889 if (len != 2 * NFSX_UNSIGNED) { 1890 printf("Kerb nickname short\n"); 1891 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADCRED); 1892 nd->nd_procnum = NFSPROC_NOOP; 1893 return (0); 1894 } 1895 nickuid = fxdr_unsigned(uid_t, *tl); 1896 nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 1897 if (*tl++ != rpc_auth_kerb || 1898 fxdr_unsigned(int, *tl) != 3 * NFSX_UNSIGNED) { 1899 printf("Kerb nick verifier bad\n"); 1900 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF); 1901 nd->nd_procnum = NFSPROC_NOOP; 1902 return (0); 1903 } 1904 nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 1905 tvin.tv_sec = *tl++; 1906 tvin.tv_usec = *tl; 1907 1908 LIST_FOREACH(nuidp, NUIDHASH(nfsd->nfsd_slp, nickuid), 1909 nu_hash) { 1910 if (nuidp->nu_cr.cr_uid == nickuid && 1911 (!nd->nd_nam2 || 1912 netaddr_match(NU_NETFAM(nuidp), 1913 &nuidp->nu_haddr, nd->nd_nam2))) 1914 break; 1915 } 1916 if (!nuidp) { 1917 nd->nd_repstat = 1918 (NFSERR_AUTHERR|AUTH_REJECTCRED); 1919 nd->nd_procnum = NFSPROC_NOOP; 1920 return (0); 1921 } 1922 1923 /* 1924 * Now, decrypt the timestamp using the session key 1925 * and validate it. 1926 */ 1927 #ifdef NFSKERB 1928 XXX 1929 #endif 1930 1931 tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec); 1932 tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec); 1933 if (nuidp->nu_expire < time.tv_sec || 1934 nuidp->nu_timestamp.tv_sec > tvout.tv_sec || 1935 (nuidp->nu_timestamp.tv_sec == tvout.tv_sec && 1936 nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) { 1937 nuidp->nu_expire = 0; 1938 nd->nd_repstat = 1939 (NFSERR_AUTHERR|AUTH_REJECTVERF); 1940 nd->nd_procnum = NFSPROC_NOOP; 1941 return (0); 1942 } 1943 nfsrv_setcred(&nuidp->nu_cr, &nd->nd_cr); 1944 nd->nd_flag |= ND_KERBNICK; 1945 }; 1946 } else { 1947 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED); 1948 nd->nd_procnum = NFSPROC_NOOP; 1949 return (0); 1950 } 1951 1952 /* 1953 * For nqnfs, get piggybacked lease request. 1954 */ 1955 if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) { 1956 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); 1957 nd->nd_flag |= fxdr_unsigned(int, *tl); 1958 if (nd->nd_flag & ND_LEASE) { 1959 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); 1960 nd->nd_duration = fxdr_unsigned(u_int32_t, *tl); 1961 } else 1962 nd->nd_duration = NQ_MINLEASE; 1963 } else 1964 nd->nd_duration = NQ_MINLEASE; 1965 nd->nd_md = md; 1966 nd->nd_dpos = dpos; 1967 return (0); 1968 nfsmout: 1969 return (error); 1970 } 1971 1972 int 1973 nfs_msg(p, server, msg) 1974 struct proc *p; 1975 char *server, *msg; 1976 { 1977 tpr_t tpr; 1978 1979 if (p) 1980 tpr = tprintf_open(p); 1981 else 1982 tpr = NULL; 1983 tprintf(tpr, "nfs server %s: %s\n", server, msg); 1984 tprintf_close(tpr); 1985 return (0); 1986 } 1987 1988 #ifdef NFSSERVER 1989 int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *, 1990 struct nfssvc_sock *, struct proc *, 1991 struct mbuf **)) = { 1992 nfsrv_null, 1993 nfsrv_getattr, 1994 nfsrv_setattr, 1995 nfsrv_lookup, 1996 nfsrv3_access, 1997 nfsrv_readlink, 1998 nfsrv_read, 1999 nfsrv_write, 2000 nfsrv_create, 2001 nfsrv_mkdir, 2002 nfsrv_symlink, 2003 nfsrv_mknod, 2004 nfsrv_remove, 2005 nfsrv_rmdir, 2006 nfsrv_rename, 2007 nfsrv_link, 2008 nfsrv_readdir, 2009 nfsrv_readdirplus, 2010 nfsrv_statfs, 2011 nfsrv_fsinfo, 2012 nfsrv_pathconf, 2013 nfsrv_commit, 2014 nqnfsrv_getlease, 2015 nqnfsrv_vacated, 2016 nfsrv_noop, 2017 nfsrv_noop 2018 }; 2019 2020 /* 2021 * Socket upcall routine for the nfsd sockets. 2022 * The caddr_t arg is a pointer to the "struct nfssvc_sock". 2023 * Essentially do as much as possible non-blocking, else punt and it will 2024 * be called with M_WAIT from an nfsd. 2025 */ 2026 void 2027 nfsrv_rcv(so, arg, waitflag) 2028 struct socket *so; 2029 caddr_t arg; 2030 int waitflag; 2031 { 2032 struct nfssvc_sock *slp = (struct nfssvc_sock *)arg; 2033 struct mbuf *m; 2034 struct mbuf *mp, *nam; 2035 struct uio auio; 2036 int flags, error; 2037 2038 if ((slp->ns_flag & SLP_VALID) == 0) 2039 return; 2040 #ifdef notdef 2041 /* 2042 * Define this to test for nfsds handling this under heavy load. 2043 */ 2044 if (waitflag == M_DONTWAIT) { 2045 slp->ns_flag |= SLP_NEEDQ; goto dorecs; 2046 } 2047 #endif 2048 /* XXX: was NULL, soreceive() requires non-NULL uio->uio_procp */ 2049 auio.uio_procp = curproc; /* XXX curproc */ 2050 if (so->so_type == SOCK_STREAM) { 2051 /* 2052 * If there are already records on the queue, defer soreceive() 2053 * to an nfsd so that there is feedback to the TCP layer that 2054 * the nfs servers are heavily loaded. 2055 */ 2056 if (slp->ns_rec && waitflag == M_DONTWAIT) { 2057 slp->ns_flag |= SLP_NEEDQ; 2058 goto dorecs; 2059 } 2060 2061 /* 2062 * Do soreceive(). 2063 */ 2064 auio.uio_resid = 1000000000; 2065 flags = MSG_DONTWAIT; 2066 error = (*so->so_receive)(so, &nam, &auio, &mp, (struct mbuf **)0, &flags); 2067 if (error || mp == (struct mbuf *)0) { 2068 if (error == EWOULDBLOCK) 2069 slp->ns_flag |= SLP_NEEDQ; 2070 else 2071 slp->ns_flag |= SLP_DISCONN; 2072 goto dorecs; 2073 } 2074 m = mp; 2075 if (slp->ns_rawend) { 2076 slp->ns_rawend->m_next = m; 2077 slp->ns_cc += 1000000000 - auio.uio_resid; 2078 } else { 2079 slp->ns_raw = m; 2080 slp->ns_cc = 1000000000 - auio.uio_resid; 2081 } 2082 while (m->m_next) 2083 m = m->m_next; 2084 slp->ns_rawend = m; 2085 2086 /* 2087 * Now try and parse record(s) out of the raw stream data. 2088 */ 2089 error = nfsrv_getstream(slp, waitflag); 2090 if (error) { 2091 if (error == EPERM) 2092 slp->ns_flag |= SLP_DISCONN; 2093 else 2094 slp->ns_flag |= SLP_NEEDQ; 2095 } 2096 } else { 2097 do { 2098 auio.uio_resid = 1000000000; 2099 flags = MSG_DONTWAIT; 2100 error = (*so->so_receive)(so, &nam, &auio, &mp, 2101 (struct mbuf **)0, &flags); 2102 if (mp) { 2103 if (nam) { 2104 m = nam; 2105 m->m_next = mp; 2106 } else 2107 m = mp; 2108 if (slp->ns_recend) 2109 slp->ns_recend->m_nextpkt = m; 2110 else 2111 slp->ns_rec = m; 2112 slp->ns_recend = m; 2113 m->m_nextpkt = (struct mbuf *)0; 2114 } 2115 if (error) { 2116 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) 2117 && error != EWOULDBLOCK) { 2118 slp->ns_flag |= SLP_DISCONN; 2119 goto dorecs; 2120 } 2121 } 2122 } while (mp); 2123 } 2124 2125 /* 2126 * Now try and process the request records, non-blocking. 2127 */ 2128 dorecs: 2129 if (waitflag == M_DONTWAIT && 2130 (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)))) 2131 nfsrv_wakenfsd(slp); 2132 } 2133 2134 /* 2135 * Try and extract an RPC request from the mbuf data list received on a 2136 * stream socket. The "waitflag" argument indicates whether or not it 2137 * can sleep. 2138 */ 2139 int 2140 nfsrv_getstream(slp, waitflag) 2141 struct nfssvc_sock *slp; 2142 int waitflag; 2143 { 2144 struct mbuf *m, **mpp; 2145 struct mbuf *recm; 2146 u_int32_t recmark; 2147 2148 if (slp->ns_flag & SLP_GETSTREAM) 2149 panic("nfs getstream"); 2150 slp->ns_flag |= SLP_GETSTREAM; 2151 for (;;) { 2152 if (slp->ns_reclen == 0) { 2153 if (slp->ns_cc < NFSX_UNSIGNED) { 2154 slp->ns_flag &= ~SLP_GETSTREAM; 2155 return (0); 2156 } 2157 m = slp->ns_raw; 2158 m_copydata(m, 0, NFSX_UNSIGNED, (caddr_t)&recmark); 2159 m_adj(m, NFSX_UNSIGNED); 2160 slp->ns_cc -= NFSX_UNSIGNED; 2161 recmark = ntohl(recmark); 2162 slp->ns_reclen = recmark & ~0x80000000; 2163 if (recmark & 0x80000000) 2164 slp->ns_flag |= SLP_LASTFRAG; 2165 else 2166 slp->ns_flag &= ~SLP_LASTFRAG; 2167 if (slp->ns_reclen > NFS_MAXPACKET) { 2168 slp->ns_flag &= ~SLP_GETSTREAM; 2169 return (EPERM); 2170 } 2171 } 2172 2173 /* 2174 * Now get the record part. 2175 * 2176 * Note that slp->ns_reclen may be 0. Linux sometimes 2177 * generates 0-length records. 2178 */ 2179 if (slp->ns_cc == slp->ns_reclen) { 2180 recm = slp->ns_raw; 2181 slp->ns_raw = slp->ns_rawend = (struct mbuf *)0; 2182 slp->ns_cc = slp->ns_reclen = 0; 2183 } else if (slp->ns_cc > slp->ns_reclen) { 2184 recm = slp->ns_raw; 2185 m = m_split(recm, slp->ns_reclen, waitflag); 2186 if (m == NULL) { 2187 slp->ns_flag &= ~SLP_GETSTREAM; 2188 return (EWOULDBLOCK); 2189 } 2190 m_claimm(recm, &nfs_mowner); 2191 slp->ns_raw = m; 2192 if (m->m_next == NULL) 2193 slp->ns_rawend = m; 2194 slp->ns_cc -= slp->ns_reclen; 2195 slp->ns_reclen = 0; 2196 } else { 2197 slp->ns_flag &= ~SLP_GETSTREAM; 2198 return (0); 2199 } 2200 2201 /* 2202 * Accumulate the fragments into a record. 2203 */ 2204 mpp = &slp->ns_frag; 2205 while (*mpp) 2206 mpp = &((*mpp)->m_next); 2207 *mpp = recm; 2208 if (slp->ns_flag & SLP_LASTFRAG) { 2209 if (slp->ns_recend) 2210 slp->ns_recend->m_nextpkt = slp->ns_frag; 2211 else 2212 slp->ns_rec = slp->ns_frag; 2213 slp->ns_recend = slp->ns_frag; 2214 slp->ns_frag = (struct mbuf *)0; 2215 } 2216 } 2217 } 2218 2219 /* 2220 * Parse an RPC header. 2221 */ 2222 int 2223 nfsrv_dorec(slp, nfsd, ndp) 2224 struct nfssvc_sock *slp; 2225 struct nfsd *nfsd; 2226 struct nfsrv_descript **ndp; 2227 { 2228 struct mbuf *m, *nam; 2229 struct nfsrv_descript *nd; 2230 int error; 2231 2232 *ndp = NULL; 2233 if ((slp->ns_flag & SLP_VALID) == 0 || 2234 (m = slp->ns_rec) == (struct mbuf *)0) 2235 return (ENOBUFS); 2236 slp->ns_rec = m->m_nextpkt; 2237 if (slp->ns_rec) 2238 m->m_nextpkt = (struct mbuf *)0; 2239 else 2240 slp->ns_recend = (struct mbuf *)0; 2241 if (m->m_type == MT_SONAME) { 2242 nam = m; 2243 m = m->m_next; 2244 nam->m_next = NULL; 2245 } else 2246 nam = NULL; 2247 nd = pool_get(&nfs_srvdesc_pool, PR_WAITOK); 2248 nd->nd_md = nd->nd_mrep = m; 2249 nd->nd_nam2 = nam; 2250 nd->nd_dpos = mtod(m, caddr_t); 2251 error = nfs_getreq(nd, nfsd, TRUE); 2252 if (error) { 2253 m_freem(nam); 2254 pool_put(&nfs_srvdesc_pool, nd); 2255 return (error); 2256 } 2257 *ndp = nd; 2258 nfsd->nfsd_nd = nd; 2259 return (0); 2260 } 2261 2262 2263 /* 2264 * Search for a sleeping nfsd and wake it up. 2265 * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the 2266 * running nfsds will go look for the work in the nfssvc_sock list. 2267 */ 2268 void 2269 nfsrv_wakenfsd(slp) 2270 struct nfssvc_sock *slp; 2271 { 2272 struct nfsd *nd; 2273 2274 if ((slp->ns_flag & SLP_VALID) == 0) 2275 return; 2276 simple_lock(&nfsd_slock); 2277 if (slp->ns_flag & SLP_DOREC) { 2278 simple_unlock(&nfsd_slock); 2279 return; 2280 } 2281 nd = SLIST_FIRST(&nfsd_idle_head); 2282 if (nd) { 2283 SLIST_REMOVE_HEAD(&nfsd_idle_head, nfsd_idle); 2284 simple_unlock(&nfsd_slock); 2285 2286 KASSERT(nd->nfsd_flag & NFSD_WAITING); 2287 nd->nfsd_flag &= ~NFSD_WAITING; 2288 if (nd->nfsd_slp) 2289 panic("nfsd wakeup"); 2290 slp->ns_sref++; 2291 nd->nfsd_slp = slp; 2292 wakeup(nd); 2293 return; 2294 } 2295 slp->ns_flag |= SLP_DOREC; 2296 nfsd_head_flag |= NFSD_CHECKSLP; 2297 TAILQ_INSERT_TAIL(&nfssvc_sockpending, slp, ns_pending); 2298 simple_unlock(&nfsd_slock); 2299 } 2300 #endif /* NFSSERVER */ 2301