1 /* $OpenBSD: nfs_socket.c,v 1.35 2003/12/08 09:33:36 mickey Exp $ */ 2 /* $NetBSD: nfs_socket.c,v 1.27 1996/04/15 20:20:00 thorpej Exp $ */ 3 4 /* 5 * Copyright (c) 1989, 1991, 1993, 1995 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Rick Macklem at The University of Guelph. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95 36 */ 37 38 /* 39 * Socket operations for use by nfs 40 */ 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/proc.h> 45 #include <sys/mount.h> 46 #include <sys/kernel.h> 47 #include <sys/mbuf.h> 48 #include <sys/vnode.h> 49 #include <sys/domain.h> 50 #include <sys/protosw.h> 51 #include <sys/socket.h> 52 #include <sys/socketvar.h> 53 #include <sys/syslog.h> 54 #include <sys/tprintf.h> 55 #include <sys/namei.h> 56 57 #include <netinet/in.h> 58 #include <netinet/tcp.h> 59 60 #include <nfs/rpcv2.h> 61 #include <nfs/nfsproto.h> 62 #include <nfs/nfs.h> 63 #include <nfs/xdr_subs.h> 64 #include <nfs/nfsm_subs.h> 65 #include <nfs/nfsmount.h> 66 #include <nfs/nfsnode.h> 67 #include <nfs/nfsrtt.h> 68 #include <nfs/nfs_var.h> 69 70 #define TRUE 1 71 #define FALSE 0 72 73 /* 74 * Estimate rto for an nfs rpc sent via. an unreliable datagram. 75 * Use the mean and mean deviation of rtt for the appropriate type of rpc 76 * for the frequent rpcs and a default for the others. 77 * The justification for doing "other" this way is that these rpcs 78 * happen so infrequently that timer est. would probably be stale. 79 * Also, since many of these rpcs are 80 * non-idempotent, a conservative timeout is desired. 81 * getattr, lookup - A+2D 82 * read, write - A+4D 83 * other - nm_timeo 84 */ 85 #define NFS_RTO(n, t) \ 86 ((t) == 0 ? (n)->nm_timeo : \ 87 ((t) < 3 ? \ 88 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \ 89 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1))) 90 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1] 91 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1] 92 /* 93 * External data, mostly RPC constants in XDR form 94 */ 95 extern u_int32_t rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, 96 rpc_auth_unix, rpc_msgaccepted, rpc_call, rpc_autherr, 97 rpc_auth_kerb; 98 extern u_int32_t nfs_prog; 99 extern struct nfsstats nfsstats; 100 extern int nfsv3_procid[NFS_NPROCS]; 101 extern int nfs_ticks; 102 103 /* 104 * Defines which timer to use for the procnum. 105 * 0 - default 106 * 1 - getattr 107 * 2 - lookup 108 * 3 - read 109 * 4 - write 110 */ 111 static int proct[NFS_NPROCS] = { 112 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, 113 0, 0, 0, 114 }; 115 116 /* 117 * There is a congestion window for outstanding rpcs maintained per mount 118 * point. The cwnd size is adjusted in roughly the way that: 119 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of 120 * SIGCOMM '88". ACM, August 1988. 121 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout 122 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd 123 * of rpcs is in progress. 124 * (The sent count and cwnd are scaled for integer arith.) 125 * Variants of "slow start" were tried and were found to be too much of a 126 * performance hit (ave. rtt 3 times larger), 127 * I suspect due to the large rtt that nfs rpcs have. 128 */ 129 #define NFS_CWNDSCALE 256 130 #define NFS_MAXCWND (NFS_CWNDSCALE * 32) 131 static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, }; 132 int nfsrtton = 0; 133 struct nfsrtt nfsrtt; 134 135 void nfs_realign(struct mbuf **, int); 136 unsigned int nfs_realign_test = 0; 137 unsigned int nfs_realign_count = 0; 138 139 struct nfsreqhead nfs_reqq; 140 141 /* 142 * Initialize sockets and congestion for a new NFS connection. 143 * We do not free the sockaddr if error. 144 */ 145 int 146 nfs_connect(nmp, rep) 147 struct nfsmount *nmp; 148 struct nfsreq *rep; 149 { 150 struct socket *so; 151 int s, error, rcvreserve, sndreserve; 152 struct sockaddr *saddr; 153 struct sockaddr_in *sin; 154 struct mbuf *m; 155 156 nmp->nm_so = (struct socket *)0; 157 saddr = mtod(nmp->nm_nam, struct sockaddr *); 158 error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype, 159 nmp->nm_soproto); 160 if (error) 161 goto bad; 162 so = nmp->nm_so; 163 nmp->nm_soflags = so->so_proto->pr_flags; 164 165 /* 166 * Some servers require that the client port be a reserved port number. 167 * We always allocate a reserved port, as this prevents filehandle 168 * disclosure through UDP port capture. 169 */ 170 if (saddr->sa_family == AF_INET) { 171 struct mbuf *mopt; 172 int *ip; 173 174 MGET(mopt, M_WAIT, MT_SOOPTS); 175 mopt->m_len = sizeof(int); 176 ip = mtod(mopt, int *); 177 *ip = IP_PORTRANGE_LOW; 178 error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt); 179 if (error) 180 goto bad; 181 182 MGET(m, M_WAIT, MT_SONAME); 183 sin = mtod(m, struct sockaddr_in *); 184 sin->sin_len = m->m_len = sizeof (struct sockaddr_in); 185 sin->sin_family = AF_INET; 186 sin->sin_addr.s_addr = INADDR_ANY; 187 sin->sin_port = htons(0); 188 error = sobind(so, m); 189 m_freem(m); 190 if (error) 191 goto bad; 192 193 MGET(mopt, M_WAIT, MT_SOOPTS); 194 mopt->m_len = sizeof(int); 195 ip = mtod(mopt, int *); 196 *ip = IP_PORTRANGE_DEFAULT; 197 error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt); 198 if (error) 199 goto bad; 200 } 201 202 /* 203 * Protocols that do not require connections may be optionally left 204 * unconnected for servers that reply from a port other than NFS_PORT. 205 */ 206 if (nmp->nm_flag & NFSMNT_NOCONN) { 207 if (nmp->nm_soflags & PR_CONNREQUIRED) { 208 error = ENOTCONN; 209 goto bad; 210 } 211 } else { 212 error = soconnect(so, nmp->nm_nam); 213 if (error) 214 goto bad; 215 216 /* 217 * Wait for the connection to complete. Cribbed from the 218 * connect system call but with the wait timing out so 219 * that interruptible mounts don't hang here for a long time. 220 */ 221 s = splsoftnet(); 222 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 223 (void) tsleep((caddr_t)&so->so_timeo, PSOCK, 224 "nfscon", 2 * hz); 225 if ((so->so_state & SS_ISCONNECTING) && 226 so->so_error == 0 && rep && 227 (error = nfs_sigintr(nmp, rep, rep->r_procp)) != 0){ 228 so->so_state &= ~SS_ISCONNECTING; 229 splx(s); 230 goto bad; 231 } 232 } 233 if (so->so_error) { 234 error = so->so_error; 235 so->so_error = 0; 236 splx(s); 237 goto bad; 238 } 239 splx(s); 240 } 241 if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) { 242 so->so_rcv.sb_timeo = (5 * hz); 243 so->so_snd.sb_timeo = (5 * hz); 244 } else { 245 so->so_rcv.sb_timeo = 0; 246 so->so_snd.sb_timeo = 0; 247 } 248 if (nmp->nm_sotype == SOCK_DGRAM) { 249 sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR; 250 rcvreserve = max(nmp->nm_rsize, nmp->nm_readdirsize) + 251 NFS_MAXPKTHDR; 252 } else if (nmp->nm_sotype == SOCK_SEQPACKET) { 253 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2; 254 rcvreserve = (max(nmp->nm_rsize, nmp->nm_readdirsize) + 255 NFS_MAXPKTHDR) * 2; 256 } else { 257 if (nmp->nm_sotype != SOCK_STREAM) 258 panic("nfscon sotype"); 259 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 260 MGET(m, M_WAIT, MT_SOOPTS); 261 *mtod(m, int32_t *) = 1; 262 m->m_len = sizeof(int32_t); 263 sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m); 264 } 265 if (so->so_proto->pr_protocol == IPPROTO_TCP) { 266 MGET(m, M_WAIT, MT_SOOPTS); 267 *mtod(m, int32_t *) = 1; 268 m->m_len = sizeof(int32_t); 269 sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m); 270 } 271 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + 272 sizeof (u_int32_t)) * 2; 273 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + 274 sizeof (u_int32_t)) * 2; 275 } 276 error = soreserve(so, sndreserve, rcvreserve); 277 if (error) 278 goto bad; 279 so->so_rcv.sb_flags |= SB_NOINTR; 280 so->so_snd.sb_flags |= SB_NOINTR; 281 282 /* Initialize other non-zero congestion variables */ 283 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] = 284 nmp->nm_srtt[4] = (NFS_TIMEO << 3); 285 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] = 286 nmp->nm_sdrtt[3] = nmp->nm_sdrtt[4] = 0; 287 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */ 288 nmp->nm_sent = 0; 289 nmp->nm_timeouts = 0; 290 return (0); 291 292 bad: 293 nfs_disconnect(nmp); 294 return (error); 295 } 296 297 /* 298 * Reconnect routine: 299 * Called when a connection is broken on a reliable protocol. 300 * - clean up the old socket 301 * - nfs_connect() again 302 * - set R_MUSTRESEND for all outstanding requests on mount point 303 * If this fails the mount point is DEAD! 304 * nb: Must be called with the nfs_sndlock() set on the mount point. 305 */ 306 int 307 nfs_reconnect(rep) 308 struct nfsreq *rep; 309 { 310 struct nfsreq *rp; 311 struct nfsmount *nmp = rep->r_nmp; 312 int error; 313 314 nfs_disconnect(nmp); 315 while ((error = nfs_connect(nmp, rep)) != 0) { 316 if (error == EINTR || error == ERESTART) 317 return (EINTR); 318 (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); 319 } 320 321 /* 322 * Loop through outstanding request list and fix up all requests 323 * on old socket. 324 */ 325 for (rp = TAILQ_FIRST(&nfs_reqq); rp != NULL; 326 rp = TAILQ_NEXT(rp, r_chain)) { 327 if (rp->r_nmp == nmp) 328 rp->r_flags |= R_MUSTRESEND; 329 } 330 return (0); 331 } 332 333 /* 334 * NFS disconnect. Clean up and unlink. 335 */ 336 void 337 nfs_disconnect(nmp) 338 struct nfsmount *nmp; 339 { 340 struct socket *so; 341 342 if (nmp->nm_so) { 343 so = nmp->nm_so; 344 nmp->nm_so = (struct socket *)0; 345 soshutdown(so, 2); 346 soclose(so); 347 } 348 } 349 350 /* 351 * This is the nfs send routine. For connection based socket types, it 352 * must be called with an nfs_sndlock() on the socket. 353 * "rep == NULL" indicates that it has been called from a server. 354 * For the client side: 355 * - return EINTR if the RPC is terminated, 0 otherwise 356 * - set R_MUSTRESEND if the send fails for any reason 357 * - do any cleanup required by recoverable socket errors (???) 358 * For the server side: 359 * - return EINTR or ERESTART if interrupted by a signal 360 * - return EPIPE if a connection is lost for connection based sockets (TCP...) 361 * - do any cleanup required by recoverable socket errors (???) 362 */ 363 int 364 nfs_send(so, nam, top, rep) 365 struct socket *so; 366 struct mbuf *nam; 367 struct mbuf *top; 368 struct nfsreq *rep; 369 { 370 struct mbuf *sendnam; 371 int error, soflags, flags; 372 373 if (rep) { 374 if (rep->r_flags & R_SOFTTERM) { 375 m_freem(top); 376 return (EINTR); 377 } 378 if ((so = rep->r_nmp->nm_so) == NULL) { 379 rep->r_flags |= R_MUSTRESEND; 380 m_freem(top); 381 return (0); 382 } 383 rep->r_flags &= ~R_MUSTRESEND; 384 soflags = rep->r_nmp->nm_soflags; 385 } else 386 soflags = so->so_proto->pr_flags; 387 if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) 388 sendnam = (struct mbuf *)0; 389 else 390 sendnam = nam; 391 if (so->so_type == SOCK_SEQPACKET) 392 flags = MSG_EOR; 393 else 394 flags = 0; 395 396 error = sosend(so, sendnam, (struct uio *)0, top, 397 (struct mbuf *)0, flags); 398 if (error) { 399 if (rep) { 400 log(LOG_INFO, "nfs send error %d for server %s\n",error, 401 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 402 /* 403 * Deal with errors for the client side. 404 */ 405 if (rep->r_flags & R_SOFTTERM) 406 error = EINTR; 407 else 408 rep->r_flags |= R_MUSTRESEND; 409 } else 410 log(LOG_INFO, "nfsd send error %d\n", error); 411 412 /* 413 * Handle any recoverable (soft) socket errors here. (???) 414 */ 415 if (error != EINTR && error != ERESTART && 416 error != EWOULDBLOCK && error != EPIPE) 417 error = 0; 418 } 419 return (error); 420 } 421 422 #ifdef NFSCLIENT 423 /* 424 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all 425 * done by soreceive(), but for SOCK_STREAM we must deal with the Record 426 * Mark and consolidate the data into a new mbuf list. 427 * nb: Sometimes TCP passes the data up to soreceive() in long lists of 428 * small mbufs. 429 * For SOCK_STREAM we must be very careful to read an entire record once 430 * we have read any of it, even if the system call has been interrupted. 431 */ 432 int 433 nfs_receive(rep, aname, mp) 434 struct nfsreq *rep; 435 struct mbuf **aname; 436 struct mbuf **mp; 437 { 438 struct socket *so; 439 struct uio auio; 440 struct iovec aio; 441 struct mbuf *m; 442 struct mbuf *control; 443 u_int32_t len; 444 struct mbuf **getnam; 445 int error, sotype, rcvflg; 446 struct proc *p = curproc; /* XXX */ 447 448 /* 449 * Set up arguments for soreceive() 450 */ 451 *mp = (struct mbuf *)0; 452 *aname = (struct mbuf *)0; 453 sotype = rep->r_nmp->nm_sotype; 454 455 /* 456 * For reliable protocols, lock against other senders/receivers 457 * in case a reconnect is necessary. 458 * For SOCK_STREAM, first get the Record Mark to find out how much 459 * more there is to get. 460 * We must lock the socket against other receivers 461 * until we have an entire rpc request/reply. 462 */ 463 if (sotype != SOCK_DGRAM) { 464 error = nfs_sndlock(&rep->r_nmp->nm_flag, rep); 465 if (error) 466 return (error); 467 tryagain: 468 /* 469 * Check for fatal errors and resending request. 470 */ 471 /* 472 * Ugh: If a reconnect attempt just happened, nm_so 473 * would have changed. NULL indicates a failed 474 * attempt that has essentially shut down this 475 * mount point. 476 */ 477 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) { 478 nfs_sndunlock(&rep->r_nmp->nm_flag); 479 return (EINTR); 480 } 481 so = rep->r_nmp->nm_so; 482 if (!so) { 483 error = nfs_reconnect(rep); 484 if (error) { 485 nfs_sndunlock(&rep->r_nmp->nm_flag); 486 return (error); 487 } 488 goto tryagain; 489 } 490 while (rep->r_flags & R_MUSTRESEND) { 491 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 492 nfsstats.rpcretries++; 493 error = nfs_send(so, rep->r_nmp->nm_nam, m, rep); 494 if (error) { 495 if (error == EINTR || error == ERESTART || 496 (error = nfs_reconnect(rep)) != 0) { 497 nfs_sndunlock(&rep->r_nmp->nm_flag); 498 return (error); 499 } 500 goto tryagain; 501 } 502 } 503 nfs_sndunlock(&rep->r_nmp->nm_flag); 504 if (sotype == SOCK_STREAM) { 505 aio.iov_base = (caddr_t) &len; 506 aio.iov_len = sizeof(u_int32_t); 507 auio.uio_iov = &aio; 508 auio.uio_iovcnt = 1; 509 auio.uio_segflg = UIO_SYSSPACE; 510 auio.uio_rw = UIO_READ; 511 auio.uio_offset = 0; 512 auio.uio_resid = sizeof(u_int32_t); 513 auio.uio_procp = p; 514 do { 515 rcvflg = MSG_WAITALL; 516 error = soreceive(so, (struct mbuf **)0, &auio, 517 (struct mbuf **)0, (struct mbuf **)0, &rcvflg); 518 if (error == EWOULDBLOCK && rep) { 519 if (rep->r_flags & R_SOFTTERM) 520 return (EINTR); 521 } 522 } while (error == EWOULDBLOCK); 523 if (!error && auio.uio_resid > 0) { 524 log(LOG_INFO, 525 "short receive (%d/%d) from nfs server %s\n", 526 sizeof(u_int32_t) - auio.uio_resid, 527 sizeof(u_int32_t), 528 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 529 error = EPIPE; 530 } 531 if (error) 532 goto errout; 533 534 len = ntohl(len) & ~0x80000000; 535 /* 536 * This is SERIOUS! We are out of sync with the sender 537 * and forcing a disconnect/reconnect is all I can do. 538 */ 539 if (len > NFS_MAXPACKET) { 540 log(LOG_ERR, "%s (%d) from nfs server %s\n", 541 "impossible packet length", 542 len, 543 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 544 error = EFBIG; 545 goto errout; 546 } 547 auio.uio_resid = len; 548 do { 549 rcvflg = MSG_WAITALL; 550 error = soreceive(so, (struct mbuf **)0, 551 &auio, mp, (struct mbuf **)0, &rcvflg); 552 } while (error == EWOULDBLOCK || error == EINTR || 553 error == ERESTART); 554 if (!error && auio.uio_resid > 0) { 555 log(LOG_INFO, 556 "short receive (%d/%d) from nfs server %s\n", 557 len - auio.uio_resid, len, 558 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 559 error = EPIPE; 560 } 561 } else { 562 /* 563 * NB: Since uio_resid is big, MSG_WAITALL is ignored 564 * and soreceive() will return when it has either a 565 * control msg or a data msg. 566 * We have no use for control msg., but must grab them 567 * and then throw them away so we know what is going 568 * on. 569 */ 570 auio.uio_resid = len = 100000000; /* Anything Big */ 571 auio.uio_procp = p; 572 do { 573 rcvflg = 0; 574 error = soreceive(so, (struct mbuf **)0, 575 &auio, mp, &control, &rcvflg); 576 if (control) 577 m_freem(control); 578 if (error == EWOULDBLOCK && rep) { 579 if (rep->r_flags & R_SOFTTERM) 580 return (EINTR); 581 } 582 } while (error == EWOULDBLOCK || 583 (!error && *mp == NULL && control)); 584 if ((rcvflg & MSG_EOR) == 0) 585 printf("Egad!!\n"); 586 if (!error && *mp == NULL) 587 error = EPIPE; 588 len -= auio.uio_resid; 589 } 590 errout: 591 if (error && error != EINTR && error != ERESTART) { 592 m_freem(*mp); 593 *mp = (struct mbuf *)0; 594 if (error != EPIPE) 595 log(LOG_INFO, 596 "receive error %d from nfs server %s\n", 597 error, 598 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 599 error = nfs_sndlock(&rep->r_nmp->nm_flag, rep); 600 if (!error) { 601 error = nfs_reconnect(rep); 602 if (!error) 603 goto tryagain; 604 nfs_sndunlock(&rep->r_nmp->nm_flag); 605 } 606 } 607 } else { 608 if ((so = rep->r_nmp->nm_so) == NULL) 609 return (EACCES); 610 if (so->so_state & SS_ISCONNECTED) 611 getnam = (struct mbuf **)0; 612 else 613 getnam = aname; 614 auio.uio_resid = len = 1000000; 615 auio.uio_procp = p; 616 do { 617 rcvflg = 0; 618 error = soreceive(so, getnam, &auio, mp, 619 (struct mbuf **)0, &rcvflg); 620 if (error == EWOULDBLOCK && 621 (rep->r_flags & R_SOFTTERM)) 622 return (EINTR); 623 } while (error == EWOULDBLOCK); 624 len -= auio.uio_resid; 625 } 626 if (error) { 627 m_freem(*mp); 628 *mp = (struct mbuf *)0; 629 } 630 /* 631 * Search for any mbufs that are not a multiple of 4 bytes long 632 * or with m_data not longword aligned. 633 * These could cause pointer alignment problems, so copy them to 634 * well aligned mbufs. 635 */ 636 nfs_realign(mp, 5 * NFSX_UNSIGNED); 637 return (error); 638 } 639 640 /* 641 * Implement receipt of reply on a socket. 642 * We must search through the list of received datagrams matching them 643 * with outstanding requests using the xid, until ours is found. 644 */ 645 /* ARGSUSED */ 646 int 647 nfs_reply(myrep) 648 struct nfsreq *myrep; 649 { 650 struct nfsreq *rep; 651 struct nfsmount *nmp = myrep->r_nmp; 652 int32_t t1; 653 struct mbuf *mrep, *nam, *md; 654 u_int32_t rxid, *tl; 655 caddr_t dpos, cp2; 656 int error; 657 658 /* 659 * Loop around until we get our own reply 660 */ 661 for (;;) { 662 /* 663 * Lock against other receivers so that I don't get stuck in 664 * sbwait() after someone else has received my reply for me. 665 * Also necessary for connection based protocols to avoid 666 * race conditions during a reconnect. 667 */ 668 error = nfs_rcvlock(myrep); 669 if (error) 670 return (error); 671 /* Already received, bye bye */ 672 if (myrep->r_mrep != NULL) { 673 nfs_rcvunlock(&nmp->nm_flag); 674 return (0); 675 } 676 /* 677 * Get the next Rpc reply off the socket 678 */ 679 error = nfs_receive(myrep, &nam, &mrep); 680 nfs_rcvunlock(&nmp->nm_flag); 681 if (error) { 682 683 /* 684 * Ignore routing errors on connectionless protocols?? 685 */ 686 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { 687 nmp->nm_so->so_error = 0; 688 if (myrep->r_flags & R_GETONEREP) 689 return (0); 690 continue; 691 } 692 return (error); 693 } 694 if (nam) 695 m_freem(nam); 696 697 /* 698 * Get the xid and check that it is an rpc reply 699 */ 700 md = mrep; 701 dpos = mtod(md, caddr_t); 702 nfsm_dissect(tl, u_int32_t *, 2*NFSX_UNSIGNED); 703 rxid = *tl++; 704 if (*tl != rpc_reply) { 705 nfsstats.rpcinvalid++; 706 m_freem(mrep); 707 nfsmout: 708 if (myrep->r_flags & R_GETONEREP) 709 return (0); 710 continue; 711 } 712 713 /* 714 * Loop through the request list to match up the reply 715 * Iff no match, just drop the datagram 716 */ 717 for (rep = TAILQ_FIRST(&nfs_reqq); rep != NULL; 718 rep = TAILQ_NEXT(rep, r_chain)) { 719 if (rep->r_mrep == NULL && rxid == rep->r_xid) { 720 /* Found it.. */ 721 rep->r_mrep = mrep; 722 rep->r_md = md; 723 rep->r_dpos = dpos; 724 if (nfsrtton) { 725 struct rttl *rt; 726 727 rt = &nfsrtt.rttl[nfsrtt.pos]; 728 rt->proc = rep->r_procnum; 729 rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]); 730 rt->sent = nmp->nm_sent; 731 rt->cwnd = nmp->nm_cwnd; 732 rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1]; 733 rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1]; 734 rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid; 735 rt->tstamp = time; 736 if (rep->r_flags & R_TIMING) 737 rt->rtt = rep->r_rtt; 738 else 739 rt->rtt = 1000000; 740 nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ; 741 } 742 /* 743 * Update congestion window. 744 * Do the additive increase of 745 * one rpc/rtt. 746 */ 747 if (nmp->nm_cwnd <= nmp->nm_sent) { 748 nmp->nm_cwnd += 749 (NFS_CWNDSCALE * NFS_CWNDSCALE + 750 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; 751 if (nmp->nm_cwnd > NFS_MAXCWND) 752 nmp->nm_cwnd = NFS_MAXCWND; 753 } 754 rep->r_flags &= ~R_SENT; 755 nmp->nm_sent -= NFS_CWNDSCALE; 756 /* 757 * Update rtt using a gain of 0.125 on the mean 758 * and a gain of 0.25 on the deviation. 759 */ 760 if (rep->r_flags & R_TIMING) { 761 /* 762 * Since the timer resolution of 763 * NFS_HZ is so course, it can often 764 * result in r_rtt == 0. Since 765 * r_rtt == N means that the actual 766 * rtt is between N+dt and N+2-dt ticks, 767 * add 1. 768 */ 769 t1 = rep->r_rtt + 1; 770 t1 -= (NFS_SRTT(rep) >> 3); 771 NFS_SRTT(rep) += t1; 772 if (t1 < 0) 773 t1 = -t1; 774 t1 -= (NFS_SDRTT(rep) >> 2); 775 NFS_SDRTT(rep) += t1; 776 } 777 nmp->nm_timeouts = 0; 778 break; 779 } 780 } 781 /* 782 * If not matched to a request, drop it. 783 * If it's mine, get out. 784 */ 785 if (rep == 0) { 786 nfsstats.rpcunexpected++; 787 m_freem(mrep); 788 } else if (rep == myrep) { 789 if (rep->r_mrep == NULL) 790 panic("nfsreply nil"); 791 return (0); 792 } 793 if (myrep->r_flags & R_GETONEREP) 794 return (0); 795 } 796 } 797 798 /* 799 * nfs_request - goes something like this 800 * - fill in request struct 801 * - links it into list 802 * - calls nfs_send() for first transmit 803 * - calls nfs_receive() to get reply 804 * - break down rpc header and return with nfs reply pointed to 805 * by mrep or error 806 * nb: always frees up mreq mbuf list 807 */ 808 int 809 nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp) 810 struct vnode *vp; 811 struct mbuf *mrest; 812 int procnum; 813 struct proc *procp; 814 struct ucred *cred; 815 struct mbuf **mrp; 816 struct mbuf **mdp; 817 caddr_t *dposp; 818 { 819 struct mbuf *m, *mrep; 820 struct nfsreq *rep; 821 u_int32_t *tl; 822 int i; 823 struct nfsmount *nmp; 824 struct mbuf *md, *mheadend; 825 char nickv[RPCX_NICKVERF]; 826 time_t reqtime, waituntil; 827 caddr_t dpos, cp2; 828 int t1, s, error = 0, mrest_len, auth_len, auth_type; 829 int trylater_delay = 15, trylater_cnt = 0, failed_auth = 0; 830 int verf_len, verf_type; 831 u_int32_t xid; 832 char *auth_str, *verf_str; 833 NFSKERBKEY_T key; /* save session key */ 834 835 nmp = VFSTONFS(vp->v_mount); 836 MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); 837 rep->r_nmp = nmp; 838 rep->r_vp = vp; 839 rep->r_procp = procp; 840 rep->r_procnum = procnum; 841 i = 0; 842 m = mrest; 843 while (m) { 844 i += m->m_len; 845 m = m->m_next; 846 } 847 mrest_len = i; 848 849 /* 850 * Get the RPC header with authorization. 851 */ 852 kerbauth: 853 verf_str = auth_str = (char *)0; 854 if (nmp->nm_flag & NFSMNT_KERB) { 855 verf_str = nickv; 856 verf_len = sizeof (nickv); 857 auth_type = RPCAUTH_KERB4; 858 bzero((caddr_t)key, sizeof (key)); 859 if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str, 860 &auth_len, verf_str, verf_len)) { 861 error = nfs_getauth(nmp, rep, cred, &auth_str, 862 &auth_len, verf_str, &verf_len, key); 863 if (error) { 864 free((caddr_t)rep, M_NFSREQ); 865 m_freem(mrest); 866 return (error); 867 } 868 } 869 } else { 870 auth_type = RPCAUTH_UNIX; 871 auth_len = (((cred->cr_ngroups > nmp->nm_numgrps) ? 872 nmp->nm_numgrps : cred->cr_ngroups) << 2) + 873 5 * NFSX_UNSIGNED; 874 } 875 m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len, 876 auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid); 877 if (auth_str) 878 free(auth_str, M_TEMP); 879 880 /* 881 * For stream protocols, insert a Sun RPC Record Mark. 882 */ 883 if (nmp->nm_sotype == SOCK_STREAM) { 884 M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); 885 *mtod(m, u_int32_t *) = htonl(0x80000000 | 886 (m->m_pkthdr.len - NFSX_UNSIGNED)); 887 } 888 rep->r_mreq = m; 889 rep->r_xid = xid; 890 tryagain: 891 if (nmp->nm_flag & NFSMNT_SOFT) 892 rep->r_retry = nmp->nm_retry; 893 else 894 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ 895 rep->r_rtt = rep->r_rexmit = 0; 896 if (proct[procnum] > 0) 897 rep->r_flags = R_TIMING; 898 else 899 rep->r_flags = 0; 900 rep->r_mrep = NULL; 901 902 /* 903 * Do the client side RPC. 904 */ 905 nfsstats.rpcrequests++; 906 /* 907 * Chain request into list of outstanding requests. Be sure 908 * to put it LAST so timer finds oldest requests first. 909 */ 910 s = splsoftnet(); 911 TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain); 912 913 /* Get send time for nqnfs */ 914 reqtime = time.tv_sec; 915 916 /* 917 * If backing off another request or avoiding congestion, don't 918 * send this one now but let timer do it. If not timing a request, 919 * do it now. 920 */ 921 if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM || 922 (nmp->nm_flag & NFSMNT_DUMBTIMR) || 923 nmp->nm_sent < nmp->nm_cwnd)) { 924 splx(s); 925 if (nmp->nm_soflags & PR_CONNREQUIRED) 926 error = nfs_sndlock(&nmp->nm_flag, rep); 927 if (!error) { 928 error = nfs_send(nmp->nm_so, nmp->nm_nam, 929 m_copym(m, 0, M_COPYALL, M_WAIT), 930 rep); 931 if (nmp->nm_soflags & PR_CONNREQUIRED) 932 nfs_sndunlock(&nmp->nm_flag); 933 } 934 if (!error && (rep->r_flags & R_MUSTRESEND) == 0) { 935 nmp->nm_sent += NFS_CWNDSCALE; 936 rep->r_flags |= R_SENT; 937 } 938 } else { 939 splx(s); 940 rep->r_rtt = -1; 941 } 942 943 /* 944 * Wait for the reply from our send or the timer's. 945 */ 946 if (!error || error == EPIPE) 947 error = nfs_reply(rep); 948 949 /* 950 * RPC done, unlink the request. 951 */ 952 s = splsoftnet(); 953 TAILQ_REMOVE(&nfs_reqq, rep, r_chain); 954 splx(s); 955 956 /* 957 * Decrement the outstanding request count. 958 */ 959 if (rep->r_flags & R_SENT) { 960 rep->r_flags &= ~R_SENT; /* paranoia */ 961 nmp->nm_sent -= NFS_CWNDSCALE; 962 } 963 964 /* 965 * If there was a successful reply and a tprintf msg. 966 * tprintf a response. 967 */ 968 if (!error && (rep->r_flags & R_TPRINTFMSG)) 969 nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname, 970 "is alive again"); 971 mrep = rep->r_mrep; 972 md = rep->r_md; 973 dpos = rep->r_dpos; 974 if (error) { 975 m_freem(rep->r_mreq); 976 free((caddr_t)rep, M_NFSREQ); 977 return (error); 978 } 979 980 /* 981 * break down the rpc header and check if ok 982 */ 983 nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 984 if (*tl++ == rpc_msgdenied) { 985 if (*tl == rpc_mismatch) 986 error = EOPNOTSUPP; 987 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) { 988 if (!failed_auth) { 989 failed_auth++; 990 mheadend->m_next = (struct mbuf *)0; 991 m_freem(mrep); 992 m_freem(rep->r_mreq); 993 goto kerbauth; 994 } else 995 error = EAUTH; 996 } else 997 error = EACCES; 998 m_freem(mrep); 999 m_freem(rep->r_mreq); 1000 free((caddr_t)rep, M_NFSREQ); 1001 return (error); 1002 } 1003 1004 /* 1005 * Grab any Kerberos verifier, otherwise just throw it away. 1006 */ 1007 verf_type = fxdr_unsigned(int, *tl++); 1008 i = fxdr_unsigned(int32_t, *tl); 1009 if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) { 1010 error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep); 1011 if (error) 1012 goto nfsmout; 1013 } else if (i > 0) 1014 nfsm_adv(nfsm_rndup(i)); 1015 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); 1016 /* 0 == ok */ 1017 if (*tl == 0) { 1018 nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); 1019 if (*tl != 0) { 1020 error = fxdr_unsigned(int, *tl); 1021 if ((nmp->nm_flag & NFSMNT_NFSV3) && 1022 error == NFSERR_TRYLATER) { 1023 m_freem(mrep); 1024 error = 0; 1025 waituntil = time.tv_sec + trylater_delay; 1026 while (time.tv_sec < waituntil) 1027 (void) tsleep((caddr_t)&lbolt, 1028 PSOCK, "nqnfstry", 0); 1029 trylater_delay *= nfs_backoff[trylater_cnt]; 1030 if (trylater_cnt < 7) 1031 trylater_cnt++; 1032 goto tryagain; 1033 } 1034 1035 /* 1036 * If the File Handle was stale, invalidate the 1037 * lookup cache, just in case. 1038 */ 1039 if (error == ESTALE) 1040 cache_purge(vp); 1041 if (nmp->nm_flag & NFSMNT_NFSV3) { 1042 *mrp = mrep; 1043 *mdp = md; 1044 *dposp = dpos; 1045 error |= NFSERR_RETERR; 1046 } else 1047 m_freem(mrep); 1048 m_freem(rep->r_mreq); 1049 free((caddr_t)rep, M_NFSREQ); 1050 return (error); 1051 } 1052 1053 *mrp = mrep; 1054 *mdp = md; 1055 *dposp = dpos; 1056 m_freem(rep->r_mreq); 1057 FREE((caddr_t)rep, M_NFSREQ); 1058 return (0); 1059 } 1060 m_freem(mrep); 1061 error = EPROTONOSUPPORT; 1062 nfsmout: 1063 m_freem(rep->r_mreq); 1064 free((caddr_t)rep, M_NFSREQ); 1065 return (error); 1066 } 1067 #endif /* NFSCLIENT */ 1068 1069 /* 1070 * Generate the rpc reply header 1071 * siz arg. is used to decide if adding a cluster is worthwhile 1072 */ 1073 int 1074 nfs_rephead(siz, nd, slp, err, frev, mrq, mbp, bposp) 1075 int siz; 1076 struct nfsrv_descript *nd; 1077 struct nfssvc_sock *slp; 1078 int err; 1079 u_quad_t *frev; 1080 struct mbuf **mrq; 1081 struct mbuf **mbp; 1082 caddr_t *bposp; 1083 { 1084 u_int32_t *tl; 1085 struct mbuf *mreq; 1086 caddr_t bpos; 1087 struct mbuf *mb, *mb2; 1088 1089 MGETHDR(mreq, M_WAIT, MT_DATA); 1090 mb = mreq; 1091 /* 1092 * If this is a big reply, use a cluster else 1093 * try and leave leading space for the lower level headers. 1094 */ 1095 siz += RPC_REPLYSIZ; 1096 if (siz >= max_datalen) { 1097 MCLGET(mreq, M_WAIT); 1098 } else 1099 mreq->m_data += max_hdr; 1100 tl = mtod(mreq, u_int32_t *); 1101 mreq->m_len = 6 * NFSX_UNSIGNED; 1102 bpos = ((caddr_t)tl) + mreq->m_len; 1103 *tl++ = txdr_unsigned(nd->nd_retxid); 1104 *tl++ = rpc_reply; 1105 if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) { 1106 *tl++ = rpc_msgdenied; 1107 if (err & NFSERR_AUTHERR) { 1108 *tl++ = rpc_autherr; 1109 *tl = txdr_unsigned(err & ~NFSERR_AUTHERR); 1110 mreq->m_len -= NFSX_UNSIGNED; 1111 bpos -= NFSX_UNSIGNED; 1112 } else { 1113 *tl++ = rpc_mismatch; 1114 *tl++ = txdr_unsigned(RPC_VER2); 1115 *tl = txdr_unsigned(RPC_VER2); 1116 } 1117 } else { 1118 *tl++ = rpc_msgaccepted; 1119 1120 /* 1121 * For Kerberos authentication, we must send the nickname 1122 * verifier back, otherwise just RPCAUTH_NULL. 1123 */ 1124 if (nd->nd_flag & ND_KERBFULL) { 1125 struct nfsuid *nuidp; 1126 struct timeval ktvin, ktvout; 1127 1128 for (nuidp = NUIDHASH(slp, nd->nd_cr.cr_uid)->lh_first; 1129 nuidp != NULL; nuidp = LIST_NEXT(nuidp, nu_hash)) { 1130 if (nuidp->nu_cr.cr_uid == nd->nd_cr.cr_uid && 1131 (!nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp), 1132 &nuidp->nu_haddr, nd->nd_nam2))) 1133 break; 1134 } 1135 if (nuidp) { 1136 ktvin.tv_sec = 1137 txdr_unsigned(nuidp->nu_timestamp.tv_sec - 1); 1138 ktvin.tv_usec = 1139 txdr_unsigned(nuidp->nu_timestamp.tv_usec); 1140 1141 *tl++ = rpc_auth_kerb; 1142 *tl++ = txdr_unsigned(3 * NFSX_UNSIGNED); 1143 *tl = ktvout.tv_sec; 1144 nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 1145 *tl++ = ktvout.tv_usec; 1146 *tl++ = txdr_unsigned(nuidp->nu_cr.cr_uid); 1147 } else { 1148 *tl++ = 0; 1149 *tl++ = 0; 1150 } 1151 } else { 1152 *tl++ = 0; 1153 *tl++ = 0; 1154 } 1155 switch (err) { 1156 case EPROGUNAVAIL: 1157 *tl = txdr_unsigned(RPC_PROGUNAVAIL); 1158 break; 1159 case EPROGMISMATCH: 1160 *tl = txdr_unsigned(RPC_PROGMISMATCH); 1161 nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 1162 *tl++ = txdr_unsigned(2); 1163 *tl = txdr_unsigned(3); 1164 break; 1165 case EPROCUNAVAIL: 1166 *tl = txdr_unsigned(RPC_PROCUNAVAIL); 1167 break; 1168 case EBADRPC: 1169 *tl = txdr_unsigned(RPC_GARBAGE); 1170 break; 1171 default: 1172 *tl = 0; 1173 if (err != NFSERR_RETVOID) { 1174 nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); 1175 if (err) 1176 *tl = txdr_unsigned(nfsrv_errmap(nd, err)); 1177 else 1178 *tl = 0; 1179 } 1180 break; 1181 }; 1182 } 1183 1184 *mrq = mreq; 1185 if (mbp != NULL) 1186 *mbp = mb; 1187 *bposp = bpos; 1188 if (err != 0 && err != NFSERR_RETVOID) 1189 nfsstats.srvrpc_errs++; 1190 return (0); 1191 } 1192 1193 /* 1194 * Nfs timer routine 1195 * Scan the nfsreq list and retranmit any requests that have timed out 1196 * To avoid retransmission attempts on STREAM sockets (in the future) make 1197 * sure to set the r_retry field to 0 (implies nm_retry == 0). 1198 */ 1199 void 1200 nfs_timer(arg) 1201 void *arg; 1202 { 1203 struct timeout *to = (struct timeout *)arg; 1204 struct nfsreq *rep; 1205 struct mbuf *m; 1206 struct socket *so; 1207 struct nfsmount *nmp; 1208 int timeo; 1209 int s, error; 1210 #ifdef NFSSERVER 1211 struct nfssvc_sock *slp; 1212 u_quad_t cur_usec; 1213 #endif 1214 1215 s = splsoftnet(); 1216 for (rep = TAILQ_FIRST(&nfs_reqq); rep != NULL; 1217 rep = TAILQ_NEXT(rep, r_chain)) { 1218 nmp = rep->r_nmp; 1219 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) 1220 continue; 1221 if (nfs_sigintr(nmp, rep, rep->r_procp)) { 1222 rep->r_flags |= R_SOFTTERM; 1223 continue; 1224 } 1225 if (rep->r_rtt >= 0) { 1226 rep->r_rtt++; 1227 if (nmp->nm_flag & NFSMNT_DUMBTIMR) 1228 timeo = nmp->nm_timeo; 1229 else 1230 timeo = NFS_RTO(nmp, proct[rep->r_procnum]); 1231 if (nmp->nm_timeouts > 0) 1232 timeo *= nfs_backoff[nmp->nm_timeouts - 1]; 1233 if (rep->r_rtt <= timeo) 1234 continue; 1235 if (nmp->nm_timeouts < 8) 1236 nmp->nm_timeouts++; 1237 } 1238 /* 1239 * Check for server not responding 1240 */ 1241 if ((rep->r_flags & R_TPRINTFMSG) == 0 && 1242 rep->r_rexmit > nmp->nm_deadthresh) { 1243 nfs_msg(rep->r_procp, 1244 nmp->nm_mountp->mnt_stat.f_mntfromname, 1245 "not responding"); 1246 rep->r_flags |= R_TPRINTFMSG; 1247 } 1248 if (rep->r_rexmit >= rep->r_retry) { /* too many */ 1249 nfsstats.rpctimeouts++; 1250 rep->r_flags |= R_SOFTTERM; 1251 continue; 1252 } 1253 if (nmp->nm_sotype != SOCK_DGRAM) { 1254 if (++rep->r_rexmit > NFS_MAXREXMIT) 1255 rep->r_rexmit = NFS_MAXREXMIT; 1256 continue; 1257 } 1258 if ((so = nmp->nm_so) == NULL) 1259 continue; 1260 1261 /* 1262 * If there is enough space and the window allows.. 1263 * Resend it 1264 * Set r_rtt to -1 in case we fail to send it now. 1265 */ 1266 rep->r_rtt = -1; 1267 if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && 1268 ((nmp->nm_flag & NFSMNT_DUMBTIMR) || 1269 (rep->r_flags & R_SENT) || 1270 nmp->nm_sent < nmp->nm_cwnd) && 1271 (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){ 1272 if ((nmp->nm_flag & NFSMNT_NOCONN) == 0) 1273 error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 1274 (struct mbuf *)0, (struct mbuf *)0); 1275 else 1276 error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 1277 nmp->nm_nam, (struct mbuf *)0); 1278 if (error) { 1279 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) 1280 so->so_error = 0; 1281 } else { 1282 /* 1283 * Iff first send, start timing 1284 * else turn timing off, backoff timer 1285 * and divide congestion window by 2. 1286 */ 1287 if (rep->r_flags & R_SENT) { 1288 rep->r_flags &= ~R_TIMING; 1289 if (++rep->r_rexmit > NFS_MAXREXMIT) 1290 rep->r_rexmit = NFS_MAXREXMIT; 1291 nmp->nm_cwnd >>= 1; 1292 if (nmp->nm_cwnd < NFS_CWNDSCALE) 1293 nmp->nm_cwnd = NFS_CWNDSCALE; 1294 nfsstats.rpcretries++; 1295 } else { 1296 rep->r_flags |= R_SENT; 1297 nmp->nm_sent += NFS_CWNDSCALE; 1298 } 1299 rep->r_rtt = 0; 1300 } 1301 } 1302 } 1303 1304 #ifdef NFSSERVER 1305 /* 1306 * Scan the write gathering queues for writes that need to be 1307 * completed now. 1308 */ 1309 cur_usec = (u_quad_t)time.tv_sec * 1000000 + (u_quad_t)time.tv_usec; 1310 for (slp = TAILQ_FIRST(&nfssvc_sockhead); slp != NULL; 1311 slp = TAILQ_NEXT(slp, ns_chain)) { 1312 if (LIST_FIRST(&slp->ns_tq) && 1313 LIST_FIRST(&slp->ns_tq)->nd_time <= cur_usec) 1314 nfsrv_wakenfsd(slp); 1315 } 1316 #endif /* NFSSERVER */ 1317 splx(s); 1318 timeout_add(to, nfs_ticks); 1319 } 1320 1321 /* 1322 * Test for a termination condition pending on the process. 1323 * This is used for NFSMNT_INT mounts. 1324 */ 1325 int 1326 nfs_sigintr(nmp, rep, p) 1327 struct nfsmount *nmp; 1328 struct nfsreq *rep; 1329 struct proc *p; 1330 { 1331 1332 if (rep && (rep->r_flags & R_SOFTTERM)) 1333 return (EINTR); 1334 if (!(nmp->nm_flag & NFSMNT_INT)) 1335 return (0); 1336 if (p && p->p_siglist && 1337 (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) & 1338 NFSINT_SIGMASK)) 1339 return (EINTR); 1340 return (0); 1341 } 1342 1343 /* 1344 * Lock a socket against others. 1345 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply 1346 * and also to avoid race conditions between the processes with nfs requests 1347 * in progress when a reconnect is necessary. 1348 */ 1349 int 1350 nfs_sndlock(flagp, rep) 1351 int *flagp; 1352 struct nfsreq *rep; 1353 { 1354 struct proc *p; 1355 int slpflag = 0, slptimeo = 0; 1356 1357 if (rep) { 1358 p = rep->r_procp; 1359 if (rep->r_nmp->nm_flag & NFSMNT_INT) 1360 slpflag = PCATCH; 1361 } else 1362 p = (struct proc *)0; 1363 while (*flagp & NFSMNT_SNDLOCK) { 1364 if (nfs_sigintr(rep->r_nmp, rep, p)) 1365 return (EINTR); 1366 *flagp |= NFSMNT_WANTSND; 1367 (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsndlck", 1368 slptimeo); 1369 if (slpflag == PCATCH) { 1370 slpflag = 0; 1371 slptimeo = 2 * hz; 1372 } 1373 } 1374 *flagp |= NFSMNT_SNDLOCK; 1375 return (0); 1376 } 1377 1378 /* 1379 * Unlock the stream socket for others. 1380 */ 1381 void 1382 nfs_sndunlock(flagp) 1383 int *flagp; 1384 { 1385 1386 if ((*flagp & NFSMNT_SNDLOCK) == 0) 1387 panic("nfs sndunlock"); 1388 *flagp &= ~NFSMNT_SNDLOCK; 1389 if (*flagp & NFSMNT_WANTSND) { 1390 *flagp &= ~NFSMNT_WANTSND; 1391 wakeup((caddr_t)flagp); 1392 } 1393 } 1394 1395 int 1396 nfs_rcvlock(rep) 1397 struct nfsreq *rep; 1398 { 1399 int *flagp = &rep->r_nmp->nm_flag; 1400 int slpflag, slptimeo = 0; 1401 1402 if (*flagp & NFSMNT_INT) 1403 slpflag = PCATCH; 1404 else 1405 slpflag = 0; 1406 while (*flagp & NFSMNT_RCVLOCK) { 1407 if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp)) 1408 return (EINTR); 1409 *flagp |= NFSMNT_WANTRCV; 1410 (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsrcvlk", 1411 slptimeo); 1412 if (slpflag == PCATCH) { 1413 slpflag = 0; 1414 slptimeo = 2 * hz; 1415 } 1416 } 1417 *flagp |= NFSMNT_RCVLOCK; 1418 return (0); 1419 } 1420 1421 /* 1422 * Unlock the stream socket for others. 1423 */ 1424 void 1425 nfs_rcvunlock(flagp) 1426 int *flagp; 1427 { 1428 1429 if ((*flagp & NFSMNT_RCVLOCK) == 0) 1430 panic("nfs rcvunlock"); 1431 *flagp &= ~NFSMNT_RCVLOCK; 1432 if (*flagp & NFSMNT_WANTRCV) { 1433 *flagp &= ~NFSMNT_WANTRCV; 1434 wakeup((caddr_t)flagp); 1435 } 1436 } 1437 1438 /* 1439 * NFS parsing code requires 32-bit alignment 1440 */ 1441 void 1442 nfs_realign(struct mbuf **pm, int hsiz) 1443 { 1444 struct mbuf *m; 1445 struct mbuf *n = NULL; 1446 int off = 0; 1447 1448 ++nfs_realign_test; 1449 while ((m = *pm) != NULL) { 1450 if ((m->m_len & 0x3) || (mtod(m, long) & 0x3)) { 1451 MGET(n, M_WAIT, MT_DATA); 1452 if (m->m_len >= MINCLSIZE) { 1453 MCLGET(n, M_WAIT); 1454 } 1455 n->m_len = 0; 1456 break; 1457 } 1458 pm = &m->m_next; 1459 } 1460 /* 1461 * If n is non-NULL, loop on m copying data, then replace the 1462 * portion of the chain that had to be realigned. 1463 */ 1464 if (n != NULL) { 1465 ++nfs_realign_count; 1466 while (m) { 1467 m_copyback(n, off, m->m_len, mtod(m, caddr_t)); 1468 off += m->m_len; 1469 m = m->m_next; 1470 } 1471 m_freem(*pm); 1472 *pm = n; 1473 } 1474 } 1475 1476 1477 /* 1478 * Parse an RPC request 1479 * - verify it 1480 * - fill in the cred struct. 1481 */ 1482 int 1483 nfs_getreq(nd, nfsd, has_header) 1484 struct nfsrv_descript *nd; 1485 struct nfsd *nfsd; 1486 int has_header; 1487 { 1488 int len, i; 1489 u_int32_t *tl; 1490 int32_t t1; 1491 struct uio uio; 1492 struct iovec iov; 1493 caddr_t dpos, cp2, cp; 1494 u_int32_t nfsvers, auth_type; 1495 uid_t nickuid; 1496 int error = 0, ticklen; 1497 struct mbuf *mrep, *md; 1498 struct nfsuid *nuidp; 1499 struct timeval tvin, tvout; 1500 1501 mrep = nd->nd_mrep; 1502 md = nd->nd_md; 1503 dpos = nd->nd_dpos; 1504 if (has_header) { 1505 nfsm_dissect(tl, u_int32_t *, 10 * NFSX_UNSIGNED); 1506 nd->nd_retxid = fxdr_unsigned(u_int32_t, *tl++); 1507 if (*tl++ != rpc_call) { 1508 m_freem(mrep); 1509 return (EBADRPC); 1510 } 1511 } else 1512 nfsm_dissect(tl, u_int32_t *, 8 * NFSX_UNSIGNED); 1513 nd->nd_repstat = 0; 1514 nd->nd_flag = 0; 1515 if (*tl++ != rpc_vers) { 1516 nd->nd_repstat = ERPCMISMATCH; 1517 nd->nd_procnum = NFSPROC_NOOP; 1518 return (0); 1519 } 1520 if (*tl != nfs_prog) { 1521 nd->nd_repstat = EPROGUNAVAIL; 1522 nd->nd_procnum = NFSPROC_NOOP; 1523 return (0); 1524 } 1525 tl++; 1526 nfsvers = fxdr_unsigned(u_int32_t, *tl++); 1527 if (nfsvers != NFS_VER2 && nfsvers != NFS_VER3) { 1528 nd->nd_repstat = EPROGMISMATCH; 1529 nd->nd_procnum = NFSPROC_NOOP; 1530 return (0); 1531 } 1532 if (nfsvers == NFS_VER3) 1533 nd->nd_flag = ND_NFSV3; 1534 nd->nd_procnum = fxdr_unsigned(u_int32_t, *tl++); 1535 if (nd->nd_procnum == NFSPROC_NULL) 1536 return (0); 1537 if (nd->nd_procnum >= NFS_NPROCS || 1538 (nd->nd_procnum > NFSPROC_COMMIT) || 1539 (!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) { 1540 nd->nd_repstat = EPROCUNAVAIL; 1541 nd->nd_procnum = NFSPROC_NOOP; 1542 return (0); 1543 } 1544 if ((nd->nd_flag & ND_NFSV3) == 0) 1545 nd->nd_procnum = nfsv3_procid[nd->nd_procnum]; 1546 auth_type = *tl++; 1547 len = fxdr_unsigned(int, *tl++); 1548 if (len < 0 || len > RPCAUTH_MAXSIZ) { 1549 m_freem(mrep); 1550 return (EBADRPC); 1551 } 1552 1553 nd->nd_flag &= ~ND_KERBAUTH; 1554 /* 1555 * Handle auth_unix or auth_kerb. 1556 */ 1557 if (auth_type == rpc_auth_unix) { 1558 len = fxdr_unsigned(int, *++tl); 1559 if (len < 0 || len > NFS_MAXNAMLEN) { 1560 m_freem(mrep); 1561 return (EBADRPC); 1562 } 1563 nfsm_adv(nfsm_rndup(len)); 1564 nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 1565 bzero((caddr_t)&nd->nd_cr, sizeof (struct ucred)); 1566 nd->nd_cr.cr_ref = 1; 1567 nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++); 1568 nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++); 1569 len = fxdr_unsigned(int, *tl); 1570 if (len < 0 || len > RPCAUTH_UNIXGIDS) { 1571 m_freem(mrep); 1572 return (EBADRPC); 1573 } 1574 nfsm_dissect(tl, u_int32_t *, (len + 2) * NFSX_UNSIGNED); 1575 for (i = 0; i < len; i++) 1576 if (i < NGROUPS) 1577 nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++); 1578 else 1579 tl++; 1580 nd->nd_cr.cr_ngroups = (len > NGROUPS) ? NGROUPS : len; 1581 if (nd->nd_cr.cr_ngroups > 1) 1582 nfsrvw_sort(nd->nd_cr.cr_groups, nd->nd_cr.cr_ngroups); 1583 len = fxdr_unsigned(int, *++tl); 1584 if (len < 0 || len > RPCAUTH_MAXSIZ) { 1585 m_freem(mrep); 1586 return (EBADRPC); 1587 } 1588 if (len > 0) 1589 nfsm_adv(nfsm_rndup(len)); 1590 } else if (auth_type == rpc_auth_kerb) { 1591 switch (fxdr_unsigned(int, *tl++)) { 1592 case RPCAKN_FULLNAME: 1593 ticklen = fxdr_unsigned(int, *tl); 1594 *((u_int32_t *)nfsd->nfsd_authstr) = *tl; 1595 uio.uio_resid = nfsm_rndup(ticklen) + NFSX_UNSIGNED; 1596 nfsd->nfsd_authlen = uio.uio_resid + NFSX_UNSIGNED; 1597 if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) { 1598 m_freem(mrep); 1599 return (EBADRPC); 1600 } 1601 uio.uio_offset = 0; 1602 uio.uio_iov = &iov; 1603 uio.uio_iovcnt = 1; 1604 uio.uio_segflg = UIO_SYSSPACE; 1605 iov.iov_base = (caddr_t)&nfsd->nfsd_authstr[4]; 1606 iov.iov_len = RPCAUTH_MAXSIZ - 4; 1607 nfsm_mtouio(&uio, uio.uio_resid); 1608 nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 1609 if (*tl++ != rpc_auth_kerb || 1610 fxdr_unsigned(int, *tl) != 4 * NFSX_UNSIGNED) { 1611 printf("Bad kerb verifier\n"); 1612 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF); 1613 nd->nd_procnum = NFSPROC_NOOP; 1614 return (0); 1615 } 1616 nfsm_dissect(cp, caddr_t, 4 * NFSX_UNSIGNED); 1617 tl = (u_int32_t *)cp; 1618 if (fxdr_unsigned(int, *tl) != RPCAKN_FULLNAME) { 1619 printf("Not fullname kerb verifier\n"); 1620 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF); 1621 nd->nd_procnum = NFSPROC_NOOP; 1622 return (0); 1623 } 1624 cp += NFSX_UNSIGNED; 1625 bcopy(cp, nfsd->nfsd_verfstr, 3 * NFSX_UNSIGNED); 1626 nfsd->nfsd_verflen = 3 * NFSX_UNSIGNED; 1627 nd->nd_flag |= ND_KERBFULL; 1628 nfsd->nfsd_flag |= NFSD_NEEDAUTH; 1629 break; 1630 case RPCAKN_NICKNAME: 1631 if (len != 2 * NFSX_UNSIGNED) { 1632 printf("Kerb nickname short\n"); 1633 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADCRED); 1634 nd->nd_procnum = NFSPROC_NOOP; 1635 return (0); 1636 } 1637 nickuid = fxdr_unsigned(uid_t, *tl); 1638 nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED); 1639 if (*tl++ != rpc_auth_kerb || 1640 fxdr_unsigned(int, *tl) != 3 * NFSX_UNSIGNED) { 1641 printf("Kerb nick verifier bad\n"); 1642 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF); 1643 nd->nd_procnum = NFSPROC_NOOP; 1644 return (0); 1645 } 1646 nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED); 1647 tvin.tv_sec = *tl++; 1648 tvin.tv_usec = *tl; 1649 1650 for (nuidp = NUIDHASH(nfsd->nfsd_slp,nickuid)->lh_first; 1651 nuidp != NULL; nuidp = LIST_NEXT(nuidp, nu_hash)) { 1652 if (nuidp->nu_cr.cr_uid == nickuid && 1653 (!nd->nd_nam2 || 1654 netaddr_match(NU_NETFAM(nuidp), 1655 &nuidp->nu_haddr, nd->nd_nam2))) 1656 break; 1657 } 1658 if (!nuidp) { 1659 nd->nd_repstat = 1660 (NFSERR_AUTHERR|AUTH_REJECTCRED); 1661 nd->nd_procnum = NFSPROC_NOOP; 1662 return (0); 1663 } 1664 1665 tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec); 1666 tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec); 1667 if (nuidp->nu_expire < time.tv_sec || 1668 nuidp->nu_timestamp.tv_sec > tvout.tv_sec || 1669 (nuidp->nu_timestamp.tv_sec == tvout.tv_sec && 1670 nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) { 1671 nuidp->nu_expire = 0; 1672 nd->nd_repstat = 1673 (NFSERR_AUTHERR|AUTH_REJECTVERF); 1674 nd->nd_procnum = NFSPROC_NOOP; 1675 return (0); 1676 } 1677 nfsrv_setcred(&nuidp->nu_cr, &nd->nd_cr); 1678 nd->nd_flag |= ND_KERBNICK; 1679 }; 1680 } else { 1681 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED); 1682 nd->nd_procnum = NFSPROC_NOOP; 1683 return (0); 1684 } 1685 1686 nd->nd_md = md; 1687 nd->nd_dpos = dpos; 1688 return (0); 1689 nfsmout: 1690 return (error); 1691 } 1692 1693 int 1694 nfs_msg(p, server, msg) 1695 struct proc *p; 1696 char *server, *msg; 1697 { 1698 tpr_t tpr; 1699 1700 if (p) 1701 tpr = tprintf_open(p); 1702 else 1703 tpr = NULL; 1704 tprintf(tpr, "nfs server %s: %s\n", server, msg); 1705 tprintf_close(tpr); 1706 return (0); 1707 } 1708 1709 #ifdef NFSSERVER 1710 int (*nfsrv3_procs[NFS_NPROCS])(struct nfsrv_descript *, 1711 struct nfssvc_sock *, struct proc *, 1712 struct mbuf **) = { 1713 nfsrv_null, 1714 nfsrv_getattr, 1715 nfsrv_setattr, 1716 nfsrv_lookup, 1717 nfsrv3_access, 1718 nfsrv_readlink, 1719 nfsrv_read, 1720 nfsrv_write, 1721 nfsrv_create, 1722 nfsrv_mkdir, 1723 nfsrv_symlink, 1724 nfsrv_mknod, 1725 nfsrv_remove, 1726 nfsrv_rmdir, 1727 nfsrv_rename, 1728 nfsrv_link, 1729 nfsrv_readdir, 1730 nfsrv_readdirplus, 1731 nfsrv_statfs, 1732 nfsrv_fsinfo, 1733 nfsrv_pathconf, 1734 nfsrv_commit, 1735 nfsrv_noop, 1736 nfsrv_noop, 1737 nfsrv_noop, 1738 nfsrv_noop 1739 }; 1740 1741 /* 1742 * Socket upcall routine for the nfsd sockets. 1743 * The caddr_t arg is a pointer to the "struct nfssvc_sock". 1744 * Essentially do as much as possible non-blocking, else punt and it will 1745 * be called with M_WAIT from an nfsd. 1746 */ 1747 void 1748 nfsrv_rcv(so, arg, waitflag) 1749 struct socket *so; 1750 caddr_t arg; 1751 int waitflag; 1752 { 1753 struct nfssvc_sock *slp = (struct nfssvc_sock *)arg; 1754 struct mbuf *m; 1755 struct mbuf *mp, *nam; 1756 struct uio auio; 1757 int flags, error; 1758 1759 if ((slp->ns_flag & SLP_VALID) == 0) 1760 return; 1761 #ifdef notdef 1762 /* 1763 * Define this to test for nfsds handling this under heavy load. 1764 */ 1765 if (waitflag == M_DONTWAIT) { 1766 slp->ns_flag |= SLP_NEEDQ; goto dorecs; 1767 } 1768 #endif 1769 auio.uio_procp = NULL; 1770 if (so->so_type == SOCK_STREAM) { 1771 /* 1772 * If there are already records on the queue, defer soreceive() 1773 * to an nfsd so that there is feedback to the TCP layer that 1774 * the nfs servers are heavily loaded. 1775 */ 1776 if (slp->ns_rec && waitflag == M_DONTWAIT) { 1777 slp->ns_flag |= SLP_NEEDQ; 1778 goto dorecs; 1779 } 1780 1781 /* 1782 * Do soreceive(). 1783 */ 1784 auio.uio_resid = 1000000000; 1785 flags = MSG_DONTWAIT; 1786 error = soreceive(so, &nam, &auio, &mp, (struct mbuf **)0, &flags); 1787 if (error || mp == (struct mbuf *)0) { 1788 if (error == EWOULDBLOCK) 1789 slp->ns_flag |= SLP_NEEDQ; 1790 else 1791 slp->ns_flag |= SLP_DISCONN; 1792 goto dorecs; 1793 } 1794 m = mp; 1795 if (slp->ns_rawend) { 1796 slp->ns_rawend->m_next = m; 1797 slp->ns_cc += 1000000000 - auio.uio_resid; 1798 } else { 1799 slp->ns_raw = m; 1800 slp->ns_cc = 1000000000 - auio.uio_resid; 1801 } 1802 while (m->m_next) 1803 m = m->m_next; 1804 slp->ns_rawend = m; 1805 1806 /* 1807 * Now try and parse record(s) out of the raw stream data. 1808 */ 1809 error = nfsrv_getstream(slp, waitflag); 1810 if (error) { 1811 if (error == EPERM) 1812 slp->ns_flag |= SLP_DISCONN; 1813 else 1814 slp->ns_flag |= SLP_NEEDQ; 1815 } 1816 } else { 1817 do { 1818 auio.uio_resid = 1000000000; 1819 flags = MSG_DONTWAIT; 1820 error = soreceive(so, &nam, &auio, &mp, 1821 (struct mbuf **)0, &flags); 1822 if (mp) { 1823 if (nam) { 1824 m = nam; 1825 m->m_next = mp; 1826 } else 1827 m = mp; 1828 if (slp->ns_recend) 1829 slp->ns_recend->m_nextpkt = m; 1830 else 1831 slp->ns_rec = m; 1832 slp->ns_recend = m; 1833 m->m_nextpkt = (struct mbuf *)0; 1834 } 1835 if (error) { 1836 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) 1837 && error != EWOULDBLOCK) { 1838 slp->ns_flag |= SLP_DISCONN; 1839 goto dorecs; 1840 } 1841 } 1842 } while (mp); 1843 } 1844 1845 /* 1846 * Now try and process the request records, non-blocking. 1847 */ 1848 dorecs: 1849 if (waitflag == M_DONTWAIT && 1850 (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)))) 1851 nfsrv_wakenfsd(slp); 1852 } 1853 1854 /* 1855 * Try and extract an RPC request from the mbuf data list received on a 1856 * stream socket. The "waitflag" argument indicates whether or not it 1857 * can sleep. 1858 */ 1859 int 1860 nfsrv_getstream(slp, waitflag) 1861 struct nfssvc_sock *slp; 1862 int waitflag; 1863 { 1864 struct mbuf *m, **mpp; 1865 char *cp1, *cp2; 1866 int len; 1867 struct mbuf *om, *m2, *recm; 1868 u_int32_t recmark; 1869 1870 if (slp->ns_flag & SLP_GETSTREAM) 1871 panic("nfs getstream"); 1872 slp->ns_flag |= SLP_GETSTREAM; 1873 for (;;) { 1874 if (slp->ns_reclen == 0) { 1875 if (slp->ns_cc < NFSX_UNSIGNED) { 1876 slp->ns_flag &= ~SLP_GETSTREAM; 1877 return (0); 1878 } 1879 m = slp->ns_raw; 1880 if (m->m_len >= NFSX_UNSIGNED) { 1881 bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED); 1882 m->m_data += NFSX_UNSIGNED; 1883 m->m_len -= NFSX_UNSIGNED; 1884 } else { 1885 cp1 = (caddr_t)&recmark; 1886 cp2 = mtod(m, caddr_t); 1887 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) { 1888 while (m->m_len == 0) { 1889 m = m->m_next; 1890 cp2 = mtod(m, caddr_t); 1891 } 1892 *cp1++ = *cp2++; 1893 m->m_data++; 1894 m->m_len--; 1895 } 1896 } 1897 slp->ns_cc -= NFSX_UNSIGNED; 1898 recmark = ntohl(recmark); 1899 slp->ns_reclen = recmark & ~0x80000000; 1900 if (recmark & 0x80000000) 1901 slp->ns_flag |= SLP_LASTFRAG; 1902 else 1903 slp->ns_flag &= ~SLP_LASTFRAG; 1904 if (slp->ns_reclen > NFS_MAXPACKET) { 1905 slp->ns_flag &= ~SLP_GETSTREAM; 1906 return (EPERM); 1907 } 1908 } 1909 1910 /* 1911 * Now get the record part. 1912 */ 1913 recm = NULL; 1914 if (slp->ns_cc == slp->ns_reclen) { 1915 recm = slp->ns_raw; 1916 slp->ns_raw = slp->ns_rawend = (struct mbuf *)0; 1917 slp->ns_cc = slp->ns_reclen = 0; 1918 } else if (slp->ns_cc > slp->ns_reclen) { 1919 len = 0; 1920 m = slp->ns_raw; 1921 om = (struct mbuf *)0; 1922 while (len < slp->ns_reclen) { 1923 if ((len + m->m_len) > slp->ns_reclen) { 1924 m2 = m_copym(m, 0, slp->ns_reclen - len, 1925 waitflag); 1926 if (m2) { 1927 if (om) { 1928 om->m_next = m2; 1929 recm = slp->ns_raw; 1930 } else 1931 recm = m2; 1932 m->m_data += slp->ns_reclen - len; 1933 m->m_len -= slp->ns_reclen - len; 1934 len = slp->ns_reclen; 1935 } else { 1936 slp->ns_flag &= ~SLP_GETSTREAM; 1937 return (EWOULDBLOCK); 1938 } 1939 } else if ((len + m->m_len) == slp->ns_reclen) { 1940 om = m; 1941 len += m->m_len; 1942 m = m->m_next; 1943 recm = slp->ns_raw; 1944 om->m_next = (struct mbuf *)0; 1945 } else { 1946 om = m; 1947 len += m->m_len; 1948 m = m->m_next; 1949 } 1950 } 1951 slp->ns_raw = m; 1952 slp->ns_cc -= len; 1953 slp->ns_reclen = 0; 1954 } else { 1955 slp->ns_flag &= ~SLP_GETSTREAM; 1956 return (0); 1957 } 1958 1959 /* 1960 * Accumulate the fragments into a record. 1961 */ 1962 mpp = &slp->ns_frag; 1963 while (*mpp) 1964 mpp = &((*mpp)->m_next); 1965 *mpp = recm; 1966 if (slp->ns_flag & SLP_LASTFRAG) { 1967 if (slp->ns_recend) 1968 slp->ns_recend->m_nextpkt = slp->ns_frag; 1969 else 1970 slp->ns_rec = slp->ns_frag; 1971 slp->ns_recend = slp->ns_frag; 1972 slp->ns_frag = (struct mbuf *)0; 1973 } 1974 } 1975 } 1976 1977 /* 1978 * Parse an RPC header. 1979 */ 1980 int 1981 nfsrv_dorec(slp, nfsd, ndp) 1982 struct nfssvc_sock *slp; 1983 struct nfsd *nfsd; 1984 struct nfsrv_descript **ndp; 1985 { 1986 struct mbuf *m, *nam; 1987 struct nfsrv_descript *nd; 1988 int error; 1989 1990 *ndp = NULL; 1991 if ((slp->ns_flag & SLP_VALID) == 0 || 1992 (m = slp->ns_rec) == (struct mbuf *)0) 1993 return (ENOBUFS); 1994 slp->ns_rec = m->m_nextpkt; 1995 if (slp->ns_rec) 1996 m->m_nextpkt = (struct mbuf *)0; 1997 else 1998 slp->ns_recend = (struct mbuf *)0; 1999 if (m->m_type == MT_SONAME) { 2000 nam = m; 2001 m = m->m_next; 2002 nam->m_next = NULL; 2003 } else 2004 nam = NULL; 2005 MALLOC(nd, struct nfsrv_descript *, sizeof (struct nfsrv_descript), 2006 M_NFSRVDESC, M_WAITOK); 2007 nfs_realign(&m, 10 * NFSX_UNSIGNED); 2008 nd->nd_md = nd->nd_mrep = m; 2009 nd->nd_nam2 = nam; 2010 nd->nd_dpos = mtod(m, caddr_t); 2011 error = nfs_getreq(nd, nfsd, TRUE); 2012 if (error) { 2013 m_freem(nam); 2014 free((caddr_t)nd, M_NFSRVDESC); 2015 return (error); 2016 } 2017 *ndp = nd; 2018 nfsd->nfsd_nd = nd; 2019 return (0); 2020 } 2021 2022 2023 /* 2024 * Search for a sleeping nfsd and wake it up. 2025 * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the 2026 * running nfsds will go look for the work in the nfssvc_sock list. 2027 */ 2028 void 2029 nfsrv_wakenfsd(slp) 2030 struct nfssvc_sock *slp; 2031 { 2032 struct nfsd *nd; 2033 2034 if ((slp->ns_flag & SLP_VALID) == 0) 2035 return; 2036 for (nd = TAILQ_FIRST(&nfsd_head); nd != NULL; 2037 nd = TAILQ_NEXT(nd, nfsd_chain)) { 2038 if (nd->nfsd_flag & NFSD_WAITING) { 2039 nd->nfsd_flag &= ~NFSD_WAITING; 2040 if (nd->nfsd_slp) 2041 panic("nfsd wakeup"); 2042 slp->ns_sref++; 2043 nd->nfsd_slp = slp; 2044 wakeup((caddr_t)nd); 2045 return; 2046 } 2047 } 2048 slp->ns_flag |= SLP_DOREC; 2049 nfsd_head_flag |= NFSD_CHECKSLP; 2050 } 2051 #endif /* NFSSERVER */ 2052