1 /* 2 * Copyright (c) 1989, 1991 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * %sccs.include.redist.c% 9 * 10 * @(#)nfs_socket.c 7.25 (Berkeley) 03/09/92 11 */ 12 13 /* 14 * Socket operations for use by nfs 15 */ 16 17 #include "types.h" 18 #include "param.h" 19 #include "uio.h" 20 #include "proc.h" 21 #include "signal.h" 22 #include "mount.h" 23 #include "kernel.h" 24 #include "malloc.h" 25 #include "mbuf.h" 26 #include "vnode.h" 27 #include "domain.h" 28 #include "protosw.h" 29 #include "socket.h" 30 #include "socketvar.h" 31 #include "syslog.h" 32 #include "tprintf.h" 33 #include "machine/endian.h" 34 #include "netinet/in.h" 35 #include "netinet/tcp.h" 36 #ifdef ISO 37 #include "netiso/iso.h" 38 #endif 39 #include "ufs/ufs/quota.h" 40 #include "ufs/ufs/ufsmount.h" 41 #include "rpcv2.h" 42 #include "nfsv2.h" 43 #include "nfs.h" 44 #include "xdr_subs.h" 45 #include "nfsm_subs.h" 46 #include "nfsmount.h" 47 #include "nfsnode.h" 48 #include "nfsrtt.h" 49 #include "nqnfs.h" 50 51 #include "syslog.h" 52 53 #define TRUE 1 54 #define FALSE 0 55 56 int netnetnet = sizeof (struct netaddrhash); 57 /* 58 * Estimate rto for an nfs rpc sent via. an unreliable datagram. 59 * Use the mean and mean deviation of rtt for the appropriate type of rpc 60 * for the frequent rpcs and a default for the others. 61 * The justification for doing "other" this way is that these rpcs 62 * happen so infrequently that timer est. would probably be stale. 63 * Also, since many of these rpcs are 64 * non-idempotent, a conservative timeout is desired. 65 * getattr, lookup - A+2D 66 * read, write - A+4D 67 * other - nm_timeo 68 */ 69 #define NFS_RTO(n, t) \ 70 ((t) == 0 ? (n)->nm_timeo : \ 71 ((t) < 3 ? \ 72 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \ 73 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1))) 74 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1] 75 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1] 76 /* 77 * External data, mostly RPC constants in XDR form 78 */ 79 extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix, 80 rpc_msgaccepted, rpc_call, rpc_autherr, rpc_rejectedcred, 81 rpc_auth_kerb; 82 extern u_long nfs_prog, nfs_vers, nqnfs_prog, nqnfs_vers; 83 extern time_t nqnfsstarttime; 84 extern int nonidempotent[NFS_NPROCS]; 85 86 /* 87 * Maps errno values to nfs error numbers. 88 * Use NFSERR_IO as the catch all for ones not specifically defined in 89 * RFC 1094. 90 */ 91 static int nfsrv_errmap[ELAST] = { 92 NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO, 93 NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 94 NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO, 95 NFSERR_IO, NFSERR_EXIST, NFSERR_IO, NFSERR_NODEV, NFSERR_NOTDIR, 96 NFSERR_ISDIR, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 97 NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS, 98 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 99 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 100 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 101 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 102 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 103 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 104 NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO, 105 NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE, 106 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 107 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 108 NFSERR_IO, 109 }; 110 111 /* 112 * Defines which timer to use for the procnum. 113 * 0 - default 114 * 1 - getattr 115 * 2 - lookup 116 * 3 - read 117 * 4 - write 118 */ 119 static int proct[NFS_NPROCS] = { 120 0, 1, 0, 0, 2, 3, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 0, 121 }; 122 123 /* 124 * There is a congestion window for outstanding rpcs maintained per mount 125 * point. The cwnd size is adjusted in roughly the way that: 126 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of 127 * SIGCOMM '88". ACM, August 1988. 128 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout 129 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd 130 * of rpcs is in progress. 131 * (The sent count and cwnd are scaled for integer arith.) 132 * Variants of "slow start" were tried and were found to be too much of a 133 * performance hit (ave. rtt 3 times larger), 134 * I suspect due to the large rtt that nfs rpcs have. 135 */ 136 #define NFS_CWNDSCALE 256 137 #define NFS_MAXCWND (NFS_CWNDSCALE * 32) 138 static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, }; 139 int nfs_sbwait(); 140 void nfs_disconnect(), nfs_realign(), nfsrv_wakenfsd(), nfs_sndunlock(); 141 void nfs_rcvunlock(), nqnfs_serverd(); 142 struct mbuf *nfsm_rpchead(); 143 int nfsrtton = 0; 144 struct nfsrtt nfsrtt; 145 struct nfsd nfsd_head; 146 147 int nfsrv_null(), 148 nfsrv_getattr(), 149 nfsrv_setattr(), 150 nfsrv_lookup(), 151 nfsrv_readlink(), 152 nfsrv_read(), 153 nfsrv_write(), 154 nfsrv_create(), 155 nfsrv_remove(), 156 nfsrv_rename(), 157 nfsrv_link(), 158 nfsrv_symlink(), 159 nfsrv_mkdir(), 160 nfsrv_rmdir(), 161 nfsrv_readdir(), 162 nfsrv_statfs(), 163 nfsrv_noop(), 164 nqnfsrv_readdirlook(), 165 nqnfsrv_getlease(), 166 nqnfsrv_vacated(); 167 168 int (*nfsrv_procs[NFS_NPROCS])() = { 169 nfsrv_null, 170 nfsrv_getattr, 171 nfsrv_setattr, 172 nfsrv_noop, 173 nfsrv_lookup, 174 nfsrv_readlink, 175 nfsrv_read, 176 nfsrv_noop, 177 nfsrv_write, 178 nfsrv_create, 179 nfsrv_remove, 180 nfsrv_rename, 181 nfsrv_link, 182 nfsrv_symlink, 183 nfsrv_mkdir, 184 nfsrv_rmdir, 185 nfsrv_readdir, 186 nfsrv_statfs, 187 nqnfsrv_readdirlook, 188 nqnfsrv_getlease, 189 nqnfsrv_vacated, 190 }; 191 192 struct nfsreq nfsreqh; 193 194 /* 195 * Initialize sockets and congestion for a new NFS connection. 196 * We do not free the sockaddr if error. 197 */ 198 nfs_connect(nmp, rep) 199 register struct nfsmount *nmp; 200 struct nfsreq *rep; 201 { 202 register struct socket *so; 203 int s, error, rcvreserve, sndreserve; 204 struct mbuf *m; 205 206 nmp->nm_so = (struct socket *)0; 207 if (error = socreate(mtod(nmp->nm_nam, struct sockaddr *)->sa_family, 208 &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto)) 209 goto bad; 210 so = nmp->nm_so; 211 nmp->nm_soflags = so->so_proto->pr_flags; 212 213 /* 214 * Protocols that do not require connections may be optionally left 215 * unconnected for servers that reply from a port other than NFS_PORT. 216 */ 217 if (nmp->nm_flag & NFSMNT_NOCONN) { 218 if (nmp->nm_soflags & PR_CONNREQUIRED) { 219 error = ENOTCONN; 220 goto bad; 221 } 222 } else { 223 if (error = soconnect(so, nmp->nm_nam)) 224 goto bad; 225 226 /* 227 * Wait for the connection to complete. Cribbed from the 228 * connect system call but with the wait timing out so 229 * that interruptible mounts don't hang here for a long time. 230 */ 231 s = splnet(); 232 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 233 (void) tsleep((caddr_t)&so->so_timeo, PSOCK, 234 "nfscon", 2 * hz); 235 if ((so->so_state & SS_ISCONNECTING) && 236 so->so_error == 0 && rep && 237 (error = nfs_sigintr(nmp, rep, rep->r_procp))) { 238 so->so_state &= ~SS_ISCONNECTING; 239 splx(s); 240 goto bad; 241 } 242 } 243 if (so->so_error) { 244 error = so->so_error; 245 so->so_error = 0; 246 splx(s); 247 goto bad; 248 } 249 splx(s); 250 } 251 if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) { 252 so->so_rcv.sb_timeo = (5 * hz); 253 so->so_snd.sb_timeo = (5 * hz); 254 } else { 255 so->so_rcv.sb_timeo = 0; 256 so->so_snd.sb_timeo = 0; 257 } 258 if (nmp->nm_sotype == SOCK_DGRAM) { 259 sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR; 260 rcvreserve = nmp->nm_rsize + NFS_MAXPKTHDR; 261 } else if (nmp->nm_sotype == SOCK_SEQPACKET) { 262 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2; 263 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2; 264 } else { 265 if (nmp->nm_sotype != SOCK_STREAM) 266 panic("nfscon sotype"); 267 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 268 MGET(m, M_WAIT, MT_SOOPTS); 269 *mtod(m, int *) = 1; 270 m->m_len = sizeof(int); 271 sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m); 272 } 273 if (so->so_proto->pr_protocol == IPPROTO_TCP) { 274 MGET(m, M_WAIT, MT_SOOPTS); 275 *mtod(m, int *) = 1; 276 m->m_len = sizeof(int); 277 sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m); 278 } 279 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long)) 280 * 2; 281 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long)) 282 * 2; 283 } 284 if (error = soreserve(so, sndreserve, rcvreserve)) 285 goto bad; 286 so->so_rcv.sb_flags |= SB_NOINTR; 287 so->so_snd.sb_flags |= SB_NOINTR; 288 289 /* Initialize other non-zero congestion variables */ 290 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] = 291 nmp->nm_srtt[4] = (NFS_TIMEO << 3); 292 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] = 293 nmp->nm_sdrtt[3] = nmp->nm_sdrtt[4] = 0; 294 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */ 295 nmp->nm_sent = 0; 296 nmp->nm_timeouts = 0; 297 return (0); 298 299 bad: 300 nfs_disconnect(nmp); 301 return (error); 302 } 303 304 /* 305 * Reconnect routine: 306 * Called when a connection is broken on a reliable protocol. 307 * - clean up the old socket 308 * - nfs_connect() again 309 * - set R_MUSTRESEND for all outstanding requests on mount point 310 * If this fails the mount point is DEAD! 311 * nb: Must be called with the nfs_sndlock() set on the mount point. 312 */ 313 nfs_reconnect(rep) 314 register struct nfsreq *rep; 315 { 316 register struct nfsreq *rp; 317 register struct nfsmount *nmp = rep->r_nmp; 318 int error; 319 320 nfs_disconnect(nmp); 321 while (error = nfs_connect(nmp, rep)) { 322 if (error == EINTR || error == ERESTART) 323 return (EINTR); 324 (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); 325 } 326 327 /* 328 * Loop through outstanding request list and fix up all requests 329 * on old socket. 330 */ 331 rp = nfsreqh.r_next; 332 while (rp != &nfsreqh) { 333 if (rp->r_nmp == nmp) 334 rp->r_flags |= R_MUSTRESEND; 335 rp = rp->r_next; 336 } 337 return (0); 338 } 339 340 /* 341 * NFS disconnect. Clean up and unlink. 342 */ 343 void 344 nfs_disconnect(nmp) 345 register struct nfsmount *nmp; 346 { 347 register struct socket *so; 348 349 if (nmp->nm_so) { 350 so = nmp->nm_so; 351 nmp->nm_so = (struct socket *)0; 352 soshutdown(so, 2); 353 soclose(so); 354 } 355 } 356 357 /* 358 * This is the nfs send routine. For connection based socket types, it 359 * must be called with an nfs_sndlock() on the socket. 360 * "rep == NULL" indicates that it has been called from a server. 361 * For the client side: 362 * - return EINTR if the RPC is terminated, 0 otherwise 363 * - set R_MUSTRESEND if the send fails for any reason 364 * - do any cleanup required by recoverable socket errors (???) 365 * For the server side: 366 * - return EINTR or ERESTART if interrupted by a signal 367 * - return EPIPE if a connection is lost for connection based sockets (TCP...) 368 * - do any cleanup required by recoverable socket errors (???) 369 */ 370 nfs_send(so, nam, top, rep) 371 register struct socket *so; 372 struct mbuf *nam; 373 register struct mbuf *top; 374 struct nfsreq *rep; 375 { 376 struct mbuf *sendnam; 377 int error, soflags, flags; 378 379 if (rep) { 380 if (rep->r_flags & R_SOFTTERM) { 381 m_freem(top); 382 return (EINTR); 383 } 384 if ((so = rep->r_nmp->nm_so) == NULL) { 385 rep->r_flags |= R_MUSTRESEND; 386 m_freem(top); 387 return (0); 388 } 389 rep->r_flags &= ~R_MUSTRESEND; 390 soflags = rep->r_nmp->nm_soflags; 391 } else 392 soflags = so->so_proto->pr_flags; 393 if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) 394 sendnam = (struct mbuf *)0; 395 else 396 sendnam = nam; 397 if (so->so_type == SOCK_SEQPACKET) 398 flags = MSG_EOR; 399 else 400 flags = 0; 401 402 error = sosend(so, sendnam, (struct uio *)0, top, 403 (struct mbuf *)0, flags); 404 if(error) printf("nfssnd err=%d\n",error); 405 if (error) { 406 if (rep) { 407 /* 408 * Deal with errors for the client side. 409 */ 410 if (rep->r_flags & R_SOFTTERM) 411 error = EINTR; 412 else 413 rep->r_flags |= R_MUSTRESEND; 414 } 415 416 /* 417 * Handle any recoverable (soft) socket errors here. (???) 418 */ 419 if (error != EINTR && error != ERESTART && 420 error != EWOULDBLOCK && error != EPIPE) 421 error = 0; 422 } 423 return (error); 424 } 425 426 /* 427 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all 428 * done by soreceive(), but for SOCK_STREAM we must deal with the Record 429 * Mark and consolidate the data into a new mbuf list. 430 * nb: Sometimes TCP passes the data up to soreceive() in long lists of 431 * small mbufs. 432 * For SOCK_STREAM we must be very careful to read an entire record once 433 * we have read any of it, even if the system call has been interrupted. 434 */ 435 nfs_receive(rep, aname, mp) 436 register struct nfsreq *rep; 437 struct mbuf **aname; 438 struct mbuf **mp; 439 { 440 register struct socket *so; 441 struct uio auio; 442 struct iovec aio; 443 register struct mbuf *m; 444 struct mbuf *control; 445 u_long len; 446 struct mbuf **getnam; 447 int error, sotype, rcvflg; 448 449 /* 450 * Set up arguments for soreceive() 451 */ 452 *mp = (struct mbuf *)0; 453 *aname = (struct mbuf *)0; 454 sotype = rep->r_nmp->nm_sotype; 455 456 /* 457 * For reliable protocols, lock against other senders/receivers 458 * in case a reconnect is necessary. 459 * For SOCK_STREAM, first get the Record Mark to find out how much 460 * more there is to get. 461 * We must lock the socket against other receivers 462 * until we have an entire rpc request/reply. 463 */ 464 if (sotype != SOCK_DGRAM) { 465 if (error = nfs_sndlock(&rep->r_nmp->nm_flag, rep)) 466 return (error); 467 tryagain: 468 /* 469 * Check for fatal errors and resending request. 470 */ 471 /* 472 * Ugh: If a reconnect attempt just happened, nm_so 473 * would have changed. NULL indicates a failed 474 * attempt that has essentially shut down this 475 * mount point. 476 */ 477 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) { 478 nfs_sndunlock(&rep->r_nmp->nm_flag); 479 return (EINTR); 480 } 481 if ((so = rep->r_nmp->nm_so) == NULL) { 482 if (error = nfs_reconnect(rep)) { 483 nfs_sndunlock(&rep->r_nmp->nm_flag); 484 return (error); 485 } 486 goto tryagain; 487 } 488 while (rep->r_flags & R_MUSTRESEND) { 489 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 490 nfsstats.rpcretries++; 491 if (error = nfs_send(so, rep->r_nmp->nm_nam, m, rep)) { 492 if (error == EINTR || error == ERESTART || 493 (error = nfs_reconnect(rep))) { 494 nfs_sndunlock(&rep->r_nmp->nm_flag); 495 return (error); 496 } 497 goto tryagain; 498 } 499 } 500 nfs_sndunlock(&rep->r_nmp->nm_flag); 501 if (sotype == SOCK_STREAM) { 502 aio.iov_base = (caddr_t) &len; 503 aio.iov_len = sizeof(u_long); 504 auio.uio_iov = &aio; 505 auio.uio_iovcnt = 1; 506 auio.uio_segflg = UIO_SYSSPACE; 507 auio.uio_rw = UIO_READ; 508 auio.uio_offset = 0; 509 auio.uio_resid = sizeof(u_long); 510 do { 511 rcvflg = MSG_WAITALL; 512 error = soreceive(so, (struct mbuf **)0, &auio, 513 (struct mbuf **)0, (struct mbuf **)0, &rcvflg); 514 if (error == EWOULDBLOCK && rep) { 515 if (rep->r_flags & R_SOFTTERM) 516 return (EINTR); 517 } 518 } while (error == EWOULDBLOCK); 519 if (!error && auio.uio_resid > 0) { 520 if (rep) 521 log(LOG_INFO, 522 "short receive (%d/%d) from nfs server %s\n", 523 sizeof(u_long) - auio.uio_resid, 524 sizeof(u_long), 525 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 526 error = EPIPE; 527 } 528 if (error) 529 goto errout; 530 len = ntohl(len) & ~0x80000000; 531 /* 532 * This is SERIOUS! We are out of sync with the sender 533 * and forcing a disconnect/reconnect is all I can do. 534 */ 535 if (len > NFS_MAXPACKET) { 536 if (rep) 537 log(LOG_ERR, "%s (%d) from nfs server %s\n", 538 "impossible packet length", 539 len, 540 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 541 error = EFBIG; 542 goto errout; 543 } 544 auio.uio_resid = len; 545 do { 546 rcvflg = MSG_WAITALL; 547 error = soreceive(so, (struct mbuf **)0, 548 &auio, mp, (struct mbuf **)0, &rcvflg); 549 } while (error == EWOULDBLOCK || error == EINTR || 550 error == ERESTART); 551 if (!error && auio.uio_resid > 0) { 552 if (rep) 553 log(LOG_INFO, 554 "short receive (%d/%d) from nfs server %s\n", 555 len - auio.uio_resid, len, 556 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 557 error = EPIPE; 558 } 559 } else { 560 /* 561 * NB: Since uio_resid is big, MSG_WAITALL is ignored 562 * and soreceive() will return when it has either a 563 * control msg or a data msg. 564 * We have no use for control msg., but must grab them 565 * and then throw them away so we know what is going 566 * on. 567 */ 568 auio.uio_resid = len = 100000000; /* Anything Big */ 569 do { 570 rcvflg = 0; 571 error = soreceive(so, (struct mbuf **)0, 572 &auio, mp, &control, &rcvflg); 573 if (control) 574 m_freem(control); 575 if (error == EWOULDBLOCK && rep) { 576 if (rep->r_flags & R_SOFTTERM) 577 return (EINTR); 578 } 579 } while (error == EWOULDBLOCK || 580 (!error && *mp == NULL && control)); 581 if ((rcvflg & MSG_EOR) == 0) 582 printf("Egad!!\n"); 583 if (!error && *mp == NULL) 584 error = EPIPE; 585 len -= auio.uio_resid; 586 } 587 errout: 588 if (error && error != EINTR && error != ERESTART) { 589 m_freem(*mp); 590 *mp = (struct mbuf *)0; 591 if (error != EPIPE && rep) 592 log(LOG_INFO, 593 "receive error %d from nfs server %s\n", 594 error, 595 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 596 error = nfs_sndlock(&rep->r_nmp->nm_flag, rep); 597 if (!error) 598 error = nfs_reconnect(rep); 599 if (!error) 600 goto tryagain; 601 } 602 } else { 603 if ((so = rep->r_nmp->nm_so) == NULL) 604 return (EACCES); 605 if (so->so_state & SS_ISCONNECTED) 606 getnam = (struct mbuf **)0; 607 else 608 getnam = aname; 609 auio.uio_resid = len = 1000000; 610 do { 611 rcvflg = 0; 612 error = soreceive(so, getnam, &auio, mp, 613 (struct mbuf **)0, &rcvflg); 614 if (error == EWOULDBLOCK && 615 (rep->r_flags & R_SOFTTERM)) 616 return (EINTR); 617 } while (error == EWOULDBLOCK); 618 len -= auio.uio_resid; 619 } 620 if (error) { 621 m_freem(*mp); 622 *mp = (struct mbuf *)0; 623 } 624 /* 625 * Search for any mbufs that are not a multiple of 4 bytes long 626 * or with m_data not longword aligned. 627 * These could cause pointer alignment problems, so copy them to 628 * well aligned mbufs. 629 */ 630 nfs_realign(*mp, 5 * NFSX_UNSIGNED); 631 return (error); 632 } 633 634 /* 635 * Implement receipt of reply on a socket. 636 * We must search through the list of received datagrams matching them 637 * with outstanding requests using the xid, until ours is found. 638 */ 639 /* ARGSUSED */ 640 nfs_reply(myrep) 641 struct nfsreq *myrep; 642 { 643 register struct nfsreq *rep; 644 register struct nfsmount *nmp = myrep->r_nmp; 645 register long t1; 646 struct mbuf *mrep, *nam, *md; 647 u_long rxid, *tl; 648 caddr_t dpos, cp2; 649 int error; 650 651 /* 652 * Loop around until we get our own reply 653 */ 654 for (;;) { 655 /* 656 * Lock against other receivers so that I don't get stuck in 657 * sbwait() after someone else has received my reply for me. 658 * Also necessary for connection based protocols to avoid 659 * race conditions during a reconnect. 660 */ 661 if (error = nfs_rcvlock(myrep)) 662 return (error); 663 /* Already received, bye bye */ 664 if (myrep->r_mrep != NULL) { 665 nfs_rcvunlock(&nmp->nm_flag); 666 return (0); 667 } 668 /* 669 * Get the next Rpc reply off the socket 670 */ 671 error = nfs_receive(myrep, &nam, &mrep); 672 nfs_rcvunlock(&nmp->nm_flag); 673 if (error) printf("rcv err=%d\n",error); 674 if (error) { 675 676 /* 677 * Ignore routing errors on connectionless protocols?? 678 */ 679 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { 680 nmp->nm_so->so_error = 0; 681 continue; 682 } 683 return (error); 684 } 685 if (nam) 686 m_freem(nam); 687 688 /* 689 * Get the xid and check that it is an rpc reply 690 */ 691 md = mrep; 692 dpos = mtod(md, caddr_t); 693 nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED); 694 rxid = *tl++; 695 if (*tl != rpc_reply) { 696 if (nmp->nm_flag & NFSMNT_NQNFS) { 697 if (nqnfs_callback(nmp, mrep, md, dpos)) 698 nfsstats.rpcinvalid++; 699 } else { 700 nfsstats.rpcinvalid++; 701 m_freem(mrep); 702 } 703 nfsmout: 704 continue; 705 } 706 707 /* 708 * Loop through the request list to match up the reply 709 * Iff no match, just drop the datagram 710 */ 711 rep = nfsreqh.r_next; 712 while (rep != &nfsreqh) { 713 if (rep->r_mrep == NULL && rxid == rep->r_xid) { 714 /* Found it.. */ 715 rep->r_mrep = mrep; 716 rep->r_md = md; 717 rep->r_dpos = dpos; 718 if (nfsrtton) { 719 struct rttl *rt; 720 721 rt = &nfsrtt.rttl[nfsrtt.pos]; 722 rt->proc = rep->r_procnum; 723 rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]); 724 rt->sent = nmp->nm_sent; 725 rt->cwnd = nmp->nm_cwnd; 726 rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1]; 727 rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1]; 728 rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid; 729 rt->tstamp = time; 730 if (rep->r_flags & R_TIMING) 731 rt->rtt = rep->r_rtt; 732 else 733 rt->rtt = 1000000; 734 nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ; 735 } 736 /* 737 * Update congestion window. 738 * Do the additive increase of 739 * one rpc/rtt. 740 */ 741 if (nmp->nm_cwnd <= nmp->nm_sent) { 742 nmp->nm_cwnd += 743 (NFS_CWNDSCALE * NFS_CWNDSCALE + 744 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; 745 if (nmp->nm_cwnd > NFS_MAXCWND) 746 nmp->nm_cwnd = NFS_MAXCWND; 747 } 748 nmp->nm_sent -= NFS_CWNDSCALE; 749 /* 750 * Update rtt using a gain of 0.125 on the mean 751 * and a gain of 0.25 on the deviation. 752 */ 753 if (rep->r_flags & R_TIMING) { 754 /* 755 * Since the timer resolution of 756 * NFS_HZ is so course, it can often 757 * result in r_rtt == 0. Since 758 * r_rtt == N means that the actual 759 * rtt is between N+dt and N+2-dt ticks, 760 * add 1. 761 */ 762 t1 = rep->r_rtt + 1; 763 t1 -= (NFS_SRTT(rep) >> 3); 764 NFS_SRTT(rep) += t1; 765 if (t1 < 0) 766 t1 = -t1; 767 t1 -= (NFS_SDRTT(rep) >> 2); 768 NFS_SDRTT(rep) += t1; 769 } 770 nmp->nm_timeouts = 0; 771 break; 772 } 773 rep = rep->r_next; 774 } 775 /* 776 * If not matched to a request, drop it. 777 * If it's mine, get out. 778 */ 779 if (rep == &nfsreqh) { 780 nfsstats.rpcunexpected++; 781 m_freem(mrep); 782 } else if (rep == myrep) 783 return (0); 784 } 785 } 786 787 /* 788 * nfs_request - goes something like this 789 * - fill in request struct 790 * - links it into list 791 * - calls nfs_send() for first transmit 792 * - calls nfs_receive() to get reply 793 * - break down rpc header and return with nfs reply pointed to 794 * by mrep or error 795 * nb: always frees up mreq mbuf list 796 */ 797 nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp) 798 struct vnode *vp; 799 struct mbuf *mrest; 800 int procnum; 801 struct proc *procp; 802 struct ucred *cred; 803 struct mbuf **mrp; 804 struct mbuf **mdp; 805 caddr_t *dposp; 806 { 807 register struct mbuf *m, *mrep; 808 register struct nfsreq *rep; 809 register u_long *tl; 810 register int i; 811 struct nfsmount *nmp; 812 struct mbuf *md, *mheadend; 813 struct nfsreq *reph; 814 struct nfsnode *tp, *np; 815 time_t reqtime, waituntil; 816 caddr_t dpos, cp2; 817 int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type; 818 int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0; 819 u_long xid; 820 char *auth_str; 821 822 nmp = VFSTONFS(vp->v_mount); 823 MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); 824 rep->r_nmp = nmp; 825 rep->r_vp = vp; 826 rep->r_procp = procp; 827 rep->r_procnum = procnum; 828 i = 0; 829 m = mrest; 830 while (m) { 831 i += m->m_len; 832 m = m->m_next; 833 } 834 mrest_len = i; 835 836 /* 837 * Get the RPC header with authorization. 838 */ 839 kerbauth: 840 auth_str = (char *)0; 841 if (nmp->nm_flag & NFSMNT_KERB) { 842 if (failed_auth) { 843 error = nfs_getauth(nmp, rep, cred, &auth_type, 844 &auth_str, &auth_len); 845 if (error) { 846 free((caddr_t)rep, M_NFSREQ); 847 m_freem(mrest); 848 return (error); 849 } 850 } else { 851 auth_type = RPCAUTH_UNIX; 852 auth_len = 5 * NFSX_UNSIGNED; 853 } 854 } else { 855 auth_type = RPCAUTH_UNIX; 856 auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ? 857 nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) + 858 5 * NFSX_UNSIGNED; 859 } 860 m = nfsm_rpchead(cred, (nmp->nm_flag & NFSMNT_NQNFS), procnum, 861 auth_type, auth_len, auth_str, mrest, mrest_len, &mheadend, &xid); 862 if (auth_str) 863 free(auth_str, M_TEMP); 864 865 /* 866 * For stream protocols, insert a Sun RPC Record Mark. 867 */ 868 if (nmp->nm_sotype == SOCK_STREAM) { 869 M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); 870 *mtod(m, u_long *) = htonl(0x80000000 | 871 (m->m_pkthdr.len - NFSX_UNSIGNED)); 872 } 873 rep->r_mreq = m; 874 rep->r_xid = xid; 875 tryagain: 876 if (nmp->nm_flag & NFSMNT_SOFT) 877 rep->r_retry = nmp->nm_retry; 878 else 879 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ 880 rep->r_rtt = rep->r_rexmit = 0; 881 if (proct[procnum] > 0) 882 rep->r_flags = R_TIMING; 883 else 884 rep->r_flags = 0; 885 rep->r_mrep = NULL; 886 887 /* 888 * Do the client side RPC. 889 */ 890 nfsstats.rpcrequests++; 891 /* 892 * Chain request into list of outstanding requests. Be sure 893 * to put it LAST so timer finds oldest requests first. 894 */ 895 s = splsoftclock(); 896 reph = &nfsreqh; 897 reph->r_prev->r_next = rep; 898 rep->r_prev = reph->r_prev; 899 reph->r_prev = rep; 900 rep->r_next = reph; 901 902 /* Get send time for nqnfs */ 903 reqtime = time.tv_sec; 904 905 /* 906 * If backing off another request or avoiding congestion, don't 907 * send this one now but let timer do it. If not timing a request, 908 * do it now. 909 */ 910 if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM || 911 (nmp->nm_flag & NFSMNT_DUMBTIMR) || 912 nmp->nm_sent < nmp->nm_cwnd)) { 913 splx(s); 914 if (nmp->nm_soflags & PR_CONNREQUIRED) 915 error = nfs_sndlock(&nmp->nm_flag, rep); 916 if (!error) { 917 m = m_copym(m, 0, M_COPYALL, M_WAIT); 918 error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep); 919 if (nmp->nm_soflags & PR_CONNREQUIRED) 920 nfs_sndunlock(&nmp->nm_flag); 921 } 922 if (!error && (rep->r_flags & R_MUSTRESEND) == 0) { 923 nmp->nm_sent += NFS_CWNDSCALE; 924 rep->r_flags |= R_SENT; 925 } 926 } else { 927 splx(s); 928 rep->r_rtt = -1; 929 } 930 931 /* 932 * Wait for the reply from our send or the timer's. 933 */ 934 if (!error) 935 error = nfs_reply(rep); 936 937 /* 938 * RPC done, unlink the request. 939 */ 940 s = splsoftclock(); 941 rep->r_prev->r_next = rep->r_next; 942 rep->r_next->r_prev = rep->r_prev; 943 splx(s); 944 945 /* 946 * If there was a successful reply and a tprintf msg. 947 * tprintf a response. 948 */ 949 if (!error && (rep->r_flags & R_TPRINTFMSG)) 950 nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname, 951 "is alive again"); 952 mrep = rep->r_mrep; 953 md = rep->r_md; 954 dpos = rep->r_dpos; 955 if (error) { 956 m_freem(rep->r_mreq); 957 free((caddr_t)rep, M_NFSREQ); 958 return (error); 959 } 960 961 /* 962 * break down the rpc header and check if ok 963 */ 964 nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED); 965 if (*tl++ == rpc_msgdenied) { 966 if (*tl == rpc_mismatch) 967 error = EOPNOTSUPP; 968 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) { 969 if (*tl == rpc_rejectedcred && failed_auth == 0) { 970 failed_auth++; 971 mheadend->m_next = (struct mbuf *)0; 972 m_freem(mrep); 973 m_freem(rep->r_mreq); 974 goto kerbauth; 975 } else 976 error = EAUTH; 977 } else 978 error = EACCES; 979 m_freem(mrep); 980 m_freem(rep->r_mreq); 981 free((caddr_t)rep, M_NFSREQ); 982 return (error); 983 } 984 985 /* 986 * skip over the auth_verf, someday we may want to cache auth_short's 987 * for nfs_reqhead(), but for now just dump it 988 */ 989 if (*++tl != 0) { 990 i = nfsm_rndup(fxdr_unsigned(long, *tl)); 991 nfsm_adv(i); 992 } 993 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 994 /* 0 == ok */ 995 if (*tl == 0) { 996 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 997 if (*tl != 0) { 998 error = fxdr_unsigned(int, *tl); 999 m_freem(mrep); 1000 if ((nmp->nm_flag & NFSMNT_NQNFS) && 1001 error == NQNFS_TRYLATER) { 1002 error = 0; 1003 waituntil = time.tv_sec + trylater_delay; 1004 while (time.tv_sec < waituntil) 1005 (void) tsleep((caddr_t)&lbolt, 1006 PSOCK, "nqnfstry", 0); 1007 trylater_delay *= nfs_backoff[trylater_cnt]; 1008 if (trylater_cnt < 7) 1009 trylater_cnt++; 1010 goto tryagain; 1011 } 1012 m_freem(rep->r_mreq); 1013 free((caddr_t)rep, M_NFSREQ); 1014 return (error); 1015 } 1016 1017 /* 1018 * For nqnfs, get any lease in reply 1019 */ 1020 if (nmp->nm_flag & NFSMNT_NQNFS) { 1021 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 1022 if (*tl) { 1023 np = VTONFS(vp); 1024 nqlflag = fxdr_unsigned(int, *tl); 1025 nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED); 1026 cachable = fxdr_unsigned(int, *tl++); 1027 reqtime += fxdr_unsigned(int, *tl++); 1028 if (reqtime > time.tv_sec) { 1029 if (np->n_tnext) { 1030 if (np->n_tnext == (struct nfsnode *)nmp) 1031 nmp->nm_tprev = np->n_tprev; 1032 else 1033 np->n_tnext->n_tprev = np->n_tprev; 1034 if (np->n_tprev == (struct nfsnode *)nmp) 1035 nmp->nm_tnext = np->n_tnext; 1036 else 1037 np->n_tprev->n_tnext = np->n_tnext; 1038 if (nqlflag == NQL_WRITE) 1039 np->n_flag |= NQNFSWRITE; 1040 } else if (nqlflag == NQL_READ) 1041 np->n_flag &= ~NQNFSWRITE; 1042 else 1043 np->n_flag |= NQNFSWRITE; 1044 if (cachable) 1045 np->n_flag &= ~NQNFSNONCACHE; 1046 else 1047 np->n_flag |= NQNFSNONCACHE; 1048 np->n_expiry = reqtime; 1049 fxdr_hyper(tl, &np->n_lrev); 1050 tp = nmp->nm_tprev; 1051 while (tp != (struct nfsnode *)nmp && 1052 tp->n_expiry > np->n_expiry) 1053 tp = tp->n_tprev; 1054 if (tp == (struct nfsnode *)nmp) { 1055 np->n_tnext = nmp->nm_tnext; 1056 nmp->nm_tnext = np; 1057 } else { 1058 np->n_tnext = tp->n_tnext; 1059 tp->n_tnext = np; 1060 } 1061 np->n_tprev = tp; 1062 if (np->n_tnext == (struct nfsnode *)nmp) 1063 nmp->nm_tprev = np; 1064 else 1065 np->n_tnext->n_tprev = np; 1066 } 1067 } 1068 } 1069 *mrp = mrep; 1070 *mdp = md; 1071 *dposp = dpos; 1072 m_freem(rep->r_mreq); 1073 FREE((caddr_t)rep, M_NFSREQ); 1074 return (0); 1075 } 1076 m_freem(mrep); 1077 m_freem(rep->r_mreq); 1078 free((caddr_t)rep, M_NFSREQ); 1079 error = EPROTONOSUPPORT; 1080 nfsmout: 1081 return (error); 1082 } 1083 1084 /* 1085 * Generate the rpc reply header 1086 * siz arg. is used to decide if adding a cluster is worthwhile 1087 */ 1088 nfs_rephead(siz, nd, err, cache, frev, mrq, mbp, bposp) 1089 int siz; 1090 struct nfsd *nd; 1091 int err; 1092 int cache; 1093 u_quad_t *frev; 1094 struct mbuf **mrq; 1095 struct mbuf **mbp; 1096 caddr_t *bposp; 1097 { 1098 register u_long *tl; 1099 register struct mbuf *mreq; 1100 caddr_t bpos; 1101 struct mbuf *mb, *mb2; 1102 1103 MGETHDR(mreq, M_WAIT, MT_DATA); 1104 mb = mreq; 1105 /* 1106 * If this is a big reply, use a cluster else 1107 * try and leave leading space for the lower level headers. 1108 */ 1109 siz += RPC_REPLYSIZ; 1110 if (siz >= MINCLSIZE) { 1111 MCLGET(mreq, M_WAIT); 1112 } else 1113 mreq->m_data += max_hdr; 1114 tl = mtod(mreq, u_long *); 1115 mreq->m_len = 6*NFSX_UNSIGNED; 1116 bpos = ((caddr_t)tl)+mreq->m_len; 1117 *tl++ = nd->nd_retxid; 1118 *tl++ = rpc_reply; 1119 if (err == ERPCMISMATCH || err == NQNFS_AUTHERR) { 1120 *tl++ = rpc_msgdenied; 1121 if (err == NQNFS_AUTHERR) { 1122 *tl++ = rpc_autherr; 1123 *tl = rpc_rejectedcred; 1124 mreq->m_len -= NFSX_UNSIGNED; 1125 bpos -= NFSX_UNSIGNED; 1126 } else { 1127 *tl++ = rpc_mismatch; 1128 *tl++ = txdr_unsigned(2); 1129 *tl = txdr_unsigned(2); 1130 } 1131 } else { 1132 *tl++ = rpc_msgaccepted; 1133 *tl++ = 0; 1134 *tl++ = 0; 1135 switch (err) { 1136 case EPROGUNAVAIL: 1137 *tl = txdr_unsigned(RPC_PROGUNAVAIL); 1138 break; 1139 case EPROGMISMATCH: 1140 *tl = txdr_unsigned(RPC_PROGMISMATCH); 1141 nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED); 1142 *tl++ = txdr_unsigned(2); 1143 *tl = txdr_unsigned(2); /* someday 3 */ 1144 break; 1145 case EPROCUNAVAIL: 1146 *tl = txdr_unsigned(RPC_PROCUNAVAIL); 1147 break; 1148 default: 1149 *tl = 0; 1150 if (err != VNOVAL) { 1151 nfsm_build(tl, u_long *, NFSX_UNSIGNED); 1152 if (err) 1153 *tl = txdr_unsigned(nfsrv_errmap[err - 1]); 1154 else 1155 *tl = 0; 1156 } 1157 break; 1158 }; 1159 } 1160 1161 /* 1162 * For nqnfs, piggyback lease as requested. 1163 */ 1164 if (nd->nd_nqlflag != NQL_NOVAL && err == 0) { 1165 if (nd->nd_nqlflag) { 1166 nfsm_build(tl, u_long *, 5*NFSX_UNSIGNED); 1167 *tl++ = txdr_unsigned(nd->nd_nqlflag); 1168 *tl++ = txdr_unsigned(cache); 1169 *tl++ = txdr_unsigned(nd->nd_duration); 1170 txdr_hyper(frev, tl); 1171 } else { 1172 if (nd->nd_nqlflag != 0) 1173 panic("nqreph"); 1174 nfsm_build(tl, u_long *, NFSX_UNSIGNED); 1175 *tl = 0; 1176 } 1177 } 1178 *mrq = mreq; 1179 *mbp = mb; 1180 *bposp = bpos; 1181 if (err != 0 && err != VNOVAL) 1182 nfsstats.srvrpc_errs++; 1183 return (0); 1184 } 1185 1186 /* 1187 * Nfs timer routine 1188 * Scan the nfsreq list and retranmit any requests that have timed out 1189 * To avoid retransmission attempts on STREAM sockets (in the future) make 1190 * sure to set the r_retry field to 0 (implies nm_retry == 0). 1191 */ 1192 nfs_timer() 1193 { 1194 register struct nfsreq *rep; 1195 register struct mbuf *m; 1196 register struct socket *so; 1197 register struct nfsmount *nmp; 1198 register int timeo; 1199 static long lasttime = 0; 1200 int s, error; 1201 1202 s = splnet(); 1203 for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) { 1204 nmp = rep->r_nmp; 1205 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) 1206 continue; 1207 if (nfs_sigintr(nmp, rep, rep->r_procp)) { 1208 rep->r_flags |= R_SOFTTERM; 1209 continue; 1210 } 1211 if (rep->r_rtt >= 0) { 1212 rep->r_rtt++; 1213 if (nmp->nm_flag & NFSMNT_DUMBTIMR) 1214 timeo = nmp->nm_timeo; 1215 else 1216 timeo = NFS_RTO(nmp, proct[rep->r_procnum]); 1217 if (nmp->nm_timeouts > 0) 1218 timeo *= nfs_backoff[nmp->nm_timeouts - 1]; 1219 if (rep->r_rtt <= timeo) 1220 continue; 1221 if (nmp->nm_timeouts < 8) 1222 nmp->nm_timeouts++; 1223 } 1224 /* 1225 * Check for server not responding 1226 */ 1227 if ((rep->r_flags & R_TPRINTFMSG) == 0 && 1228 rep->r_rexmit > nmp->nm_deadthresh) { 1229 nfs_msg(rep->r_procp, 1230 nmp->nm_mountp->mnt_stat.f_mntfromname, 1231 "not responding"); 1232 rep->r_flags |= R_TPRINTFMSG; 1233 } 1234 if (rep->r_rexmit >= rep->r_retry) { /* too many */ 1235 nfsstats.rpctimeouts++; 1236 rep->r_flags |= R_SOFTTERM; 1237 continue; 1238 } 1239 if (nmp->nm_sotype != SOCK_DGRAM) { 1240 if (++rep->r_rexmit > NFS_MAXREXMIT) 1241 rep->r_rexmit = NFS_MAXREXMIT; 1242 continue; 1243 } 1244 if ((so = nmp->nm_so) == NULL) 1245 continue; 1246 1247 /* 1248 * If there is enough space and the window allows.. 1249 * Resend it 1250 * Set r_rtt to -1 in case we fail to send it now. 1251 */ 1252 rep->r_rtt = -1; 1253 if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && 1254 ((nmp->nm_flag & NFSMNT_DUMBTIMR) || 1255 (rep->r_flags & R_SENT) || 1256 nmp->nm_sent < nmp->nm_cwnd) && 1257 (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){ 1258 if ((nmp->nm_flag & NFSMNT_NOCONN) == 0) 1259 error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 1260 (struct mbuf *)0, (struct mbuf *)0); 1261 else 1262 error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 1263 nmp->nm_nam, (struct mbuf *)0); 1264 if (error) { 1265 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) 1266 so->so_error = 0; 1267 } else { 1268 /* 1269 * Iff first send, start timing 1270 * else turn timing off, backoff timer 1271 * and divide congestion window by 2. 1272 */ 1273 if (rep->r_flags & R_SENT) { 1274 rep->r_flags &= ~R_TIMING; 1275 if (++rep->r_rexmit > NFS_MAXREXMIT) 1276 rep->r_rexmit = NFS_MAXREXMIT; 1277 nmp->nm_cwnd >>= 1; 1278 if (nmp->nm_cwnd < NFS_CWNDSCALE) 1279 nmp->nm_cwnd = NFS_CWNDSCALE; 1280 nfsstats.rpcretries++; 1281 } else { 1282 rep->r_flags |= R_SENT; 1283 nmp->nm_sent += NFS_CWNDSCALE; 1284 } 1285 rep->r_rtt = 0; 1286 } 1287 } 1288 } 1289 1290 /* 1291 * Call the nqnfs server timer once a second to handle leases. 1292 */ 1293 if (lasttime != time.tv_sec) { 1294 lasttime = time.tv_sec; 1295 nqnfs_serverd(); 1296 } 1297 splx(s); 1298 timeout(nfs_timer, (caddr_t)0, hz/NFS_HZ); 1299 } 1300 1301 /* 1302 * Test for a termination condition pending on the process. 1303 * This is used for NFSMNT_INT mounts. 1304 */ 1305 nfs_sigintr(nmp, rep, p) 1306 struct nfsmount *nmp; 1307 struct nfsreq *rep; 1308 register struct proc *p; 1309 { 1310 1311 if (rep && (rep->r_flags & R_SOFTTERM)) 1312 return (EINTR); 1313 if (!(nmp->nm_flag & NFSMNT_INT)) 1314 return (0); 1315 if (p && p->p_sig && (((p->p_sig &~ p->p_sigmask) &~ p->p_sigignore) & 1316 NFSINT_SIGMASK)) 1317 return (EINTR); 1318 return (0); 1319 } 1320 1321 /* 1322 * Lock a socket against others. 1323 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply 1324 * and also to avoid race conditions between the processes with nfs requests 1325 * in progress when a reconnect is necessary. 1326 */ 1327 nfs_sndlock(flagp, rep) 1328 register int *flagp; 1329 struct nfsreq *rep; 1330 { 1331 struct proc *p; 1332 1333 if (rep) 1334 p = rep->r_procp; 1335 else 1336 p = (struct proc *)0; 1337 while (*flagp & NFSMNT_SNDLOCK) { 1338 if (nfs_sigintr(rep->r_nmp, rep, p)) 1339 return (EINTR); 1340 *flagp |= NFSMNT_WANTSND; 1341 (void) tsleep((caddr_t)flagp, PZERO-1, "nfsndlck", 0); 1342 } 1343 *flagp |= NFSMNT_SNDLOCK; 1344 return (0); 1345 } 1346 1347 /* 1348 * Unlock the stream socket for others. 1349 */ 1350 void 1351 nfs_sndunlock(flagp) 1352 register int *flagp; 1353 { 1354 1355 if ((*flagp & NFSMNT_SNDLOCK) == 0) 1356 panic("nfs sndunlock"); 1357 *flagp &= ~NFSMNT_SNDLOCK; 1358 if (*flagp & NFSMNT_WANTSND) { 1359 *flagp &= ~NFSMNT_WANTSND; 1360 wakeup((caddr_t)flagp); 1361 } 1362 } 1363 1364 nfs_rcvlock(rep) 1365 register struct nfsreq *rep; 1366 { 1367 register int *flagp = &rep->r_nmp->nm_flag; 1368 1369 while (*flagp & NFSMNT_RCVLOCK) { 1370 if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp)) 1371 return (EINTR); 1372 *flagp |= NFSMNT_WANTRCV; 1373 (void) tsleep((caddr_t)flagp, PZERO-1, "nfsrcvlck", 0); 1374 } 1375 *flagp |= NFSMNT_RCVLOCK; 1376 return (0); 1377 } 1378 1379 /* 1380 * Unlock the stream socket for others. 1381 */ 1382 void 1383 nfs_rcvunlock(flagp) 1384 register int *flagp; 1385 { 1386 1387 if ((*flagp & NFSMNT_RCVLOCK) == 0) 1388 panic("nfs rcvunlock"); 1389 *flagp &= ~NFSMNT_RCVLOCK; 1390 if (*flagp & NFSMNT_WANTRCV) { 1391 *flagp &= ~NFSMNT_WANTRCV; 1392 wakeup((caddr_t)flagp); 1393 } 1394 } 1395 1396 /* 1397 * This function compares two net addresses by family and returns TRUE 1398 * if they are the same host. 1399 * If there is any doubt, return FALSE. 1400 * The AF_INET family is handled as a special case so that address mbufs 1401 * don't need to be saved to store "struct in_addr", which is only 4 bytes. 1402 */ 1403 nfs_netaddr_match(family, haddr, hmask, nam) 1404 int family; 1405 union nethostaddr *haddr; 1406 union nethostaddr *hmask; 1407 struct mbuf *nam; 1408 { 1409 register struct sockaddr_in *inetaddr; 1410 #ifdef ISO 1411 register struct sockaddr_iso *isoaddr1, *isoaddr2; 1412 #endif 1413 1414 1415 switch (family) { 1416 case AF_INET: 1417 inetaddr = mtod(nam, struct sockaddr_in *); 1418 if (inetaddr->sin_family != AF_INET) 1419 return (0); 1420 if (hmask) { 1421 if ((inetaddr->sin_addr.s_addr & hmask->had_inetaddr) == 1422 (haddr->had_inetaddr & hmask->had_inetaddr)) 1423 return (1); 1424 } else if (inetaddr->sin_addr.s_addr == haddr->had_inetaddr) 1425 return (1); 1426 break; 1427 #ifdef ISO 1428 case AF_ISO: 1429 isoaddr1 = mtod(nam, struct sockaddr_iso *); 1430 if (isoaddr1->siso_family != AF_ISO) 1431 return (0); 1432 isoaddr2 = mtod(haddr->had_nam, struct sockaddr_iso *); 1433 if (isoaddr1->siso_nlen > 0 && 1434 isoaddr1->siso_nlen == isoaddr2->siso_nlen && 1435 SAME_ISOADDR(isoaddr1, isoaddr2)) 1436 return (1); 1437 break; 1438 #endif /* ISO */ 1439 default: 1440 break; 1441 }; 1442 return (0); 1443 } 1444 1445 /* 1446 * Build hash lists of net addresses and hang them off the mount point. 1447 * Called by ufs_mount() to set up the lists of export addresses. 1448 */ 1449 hang_addrlist(mp, argp) 1450 struct mount *mp; 1451 struct ufs_args *argp; 1452 { 1453 register struct netaddrhash *np, **hnp; 1454 register int i; 1455 struct ufsmount *ump; 1456 struct sockaddr *saddr; 1457 struct mbuf *nam, *msk = (struct mbuf *)0; 1458 union nethostaddr netmsk; 1459 int error; 1460 1461 if (error = sockargs(&nam, (caddr_t)argp->saddr, argp->slen, 1462 MT_SONAME)) 1463 return (error); 1464 saddr = mtod(nam, struct sockaddr *); 1465 ump = VFSTOUFS(mp); 1466 if (saddr->sa_family == AF_INET && 1467 ((struct sockaddr_in *)saddr)->sin_addr.s_addr == INADDR_ANY) { 1468 m_freem(nam); 1469 if (mp->mnt_flag & MNT_DEFEXPORTED) 1470 return (EPERM); 1471 np = &ump->um_defexported; 1472 np->neth_exflags = argp->exflags; 1473 np->neth_anon = argp->anon; 1474 np->neth_anon.cr_ref = 1; 1475 mp->mnt_flag |= MNT_DEFEXPORTED; 1476 return (0); 1477 } 1478 if (argp->msklen > 0) { 1479 if (error = sockargs(&msk, (caddr_t)argp->smask, argp->msklen, 1480 MT_SONAME)) { 1481 m_freem(nam); 1482 return (error); 1483 } 1484 1485 /* 1486 * Scan all the hash lists to check against duplications. 1487 * For the net list, try both masks to catch a subnet 1488 * of another network. 1489 */ 1490 hnp = &ump->um_netaddr[NETMASK_HASH]; 1491 np = *hnp; 1492 if (saddr->sa_family == AF_INET) 1493 netmsk.had_inetaddr = 1494 mtod(msk, struct sockaddr_in *)->sin_addr.s_addr; 1495 else 1496 netmsk.had_nam = msk; 1497 while (np) { 1498 if (nfs_netaddr_match(np->neth_family, &np->neth_haddr, 1499 &np->neth_hmask, nam) || 1500 nfs_netaddr_match(np->neth_family, &np->neth_haddr, 1501 &netmsk, nam)) { 1502 m_freem(nam); 1503 m_freem(msk); 1504 return (EPERM); 1505 } 1506 np = np->neth_next; 1507 } 1508 for (i = 0; i < NETHASHSZ; i++) { 1509 np = ump->um_netaddr[i]; 1510 while (np) { 1511 if (nfs_netaddr_match(np->neth_family, &np->neth_haddr, 1512 &netmsk, nam)) { 1513 m_freem(nam); 1514 m_freem(msk); 1515 return (EPERM); 1516 } 1517 np = np->neth_next; 1518 } 1519 } 1520 } else { 1521 hnp = &ump->um_netaddr[NETADDRHASH(saddr)]; 1522 np = ump->um_netaddr[NETMASK_HASH]; 1523 while (np) { 1524 if (nfs_netaddr_match(np->neth_family, &np->neth_haddr, 1525 &np->neth_hmask, nam)) { 1526 m_freem(nam); 1527 return (EPERM); 1528 } 1529 np = np->neth_next; 1530 } 1531 np = *hnp; 1532 while (np) { 1533 if (nfs_netaddr_match(np->neth_family, &np->neth_haddr, 1534 (union nethostaddr *)0, nam)) { 1535 m_freem(nam); 1536 return (EPERM); 1537 } 1538 np = np->neth_next; 1539 } 1540 } 1541 np = (struct netaddrhash *) malloc(sizeof(struct netaddrhash), M_NETADDR, 1542 M_WAITOK); 1543 np->neth_family = saddr->sa_family; 1544 if (saddr->sa_family == AF_INET) { 1545 np->neth_inetaddr = ((struct sockaddr_in *)saddr)->sin_addr.s_addr; 1546 m_freem(nam); 1547 if (msk) { 1548 np->neth_inetmask = netmsk.had_inetaddr; 1549 m_freem(msk); 1550 if (np->neth_inetaddr &~ np->neth_inetmask) 1551 return (EPERM); 1552 } else 1553 np->neth_inetmask = 0xffffffff; 1554 } else { 1555 np->neth_nam = nam; 1556 np->neth_msk = msk; 1557 } 1558 np->neth_exflags = argp->exflags; 1559 np->neth_anon = argp->anon; 1560 np->neth_anon.cr_ref = 1; 1561 np->neth_next = *hnp; 1562 *hnp = np; 1563 return (0); 1564 } 1565 1566 /* 1567 * Free the net address hash lists that are hanging off the mount points. 1568 */ 1569 free_addrlist(ump) 1570 struct ufsmount *ump; 1571 { 1572 register struct netaddrhash *np, *onp; 1573 register int i; 1574 1575 for (i = 0; i <= NETHASHSZ; i++) { 1576 np = ump->um_netaddr[i]; 1577 ump->um_netaddr[i] = (struct netaddrhash *)0; 1578 while (np) { 1579 onp = np; 1580 np = np->neth_next; 1581 if (onp->neth_family != AF_INET) { 1582 m_freem(onp->neth_nam); 1583 m_freem(onp->neth_msk); 1584 } 1585 free((caddr_t)onp, M_NETADDR); 1586 } 1587 } 1588 } 1589 1590 /* 1591 * Generate a hash code for an iso host address. Used by NETADDRHASH() for 1592 * iso addresses. 1593 */ 1594 iso_addrhash(saddr) 1595 struct sockaddr *saddr; 1596 { 1597 #ifdef ISO 1598 register struct sockaddr_iso *siso; 1599 register int i, sum; 1600 1601 sum = 0; 1602 for (i = 0; i < siso->siso_nlen; i++) 1603 sum += siso->siso_data[i]; 1604 return (sum & (NETHASHSZ - 1)); 1605 #else 1606 return (0); 1607 #endif /* ISO */ 1608 } 1609 1610 /* 1611 * Check for badly aligned mbuf data areas and 1612 * realign data in an mbuf list by copying the data areas up, as required. 1613 */ 1614 void 1615 nfs_realign(m, hsiz) 1616 register struct mbuf *m; 1617 int hsiz; 1618 { 1619 register struct mbuf *m2; 1620 register int siz, mlen, olen; 1621 register caddr_t tcp, fcp; 1622 struct mbuf *mnew; 1623 1624 while (m) { 1625 /* 1626 * This never happens for UDP, rarely happens for TCP 1627 * but frequently happens for iso transport. 1628 */ 1629 if ((m->m_len & 0x3) || (mtod(m, int) & 0x3)) { 1630 olen = m->m_len; 1631 fcp = mtod(m, caddr_t); 1632 m->m_flags &= ~M_PKTHDR; 1633 if (m->m_flags & M_EXT) 1634 m->m_data = m->m_ext.ext_buf; 1635 else 1636 m->m_data = m->m_dat; 1637 m->m_len = 0; 1638 tcp = mtod(m, caddr_t); 1639 mnew = m; 1640 m2 = m->m_next; 1641 1642 /* 1643 * If possible, only put the first invariant part 1644 * of the RPC header in the first mbuf. 1645 */ 1646 if (olen <= hsiz) 1647 mlen = hsiz; 1648 else 1649 mlen = M_TRAILINGSPACE(m); 1650 1651 /* 1652 * Loop through the mbuf list consolidating data. 1653 */ 1654 while (m) { 1655 while (olen > 0) { 1656 if (mlen == 0) { 1657 m2->m_flags &= ~M_PKTHDR; 1658 if (m2->m_flags & M_EXT) 1659 m2->m_data = m2->m_ext.ext_buf; 1660 else 1661 m2->m_data = m2->m_dat; 1662 m2->m_len = 0; 1663 mlen = M_TRAILINGSPACE(m2); 1664 tcp = mtod(m2, caddr_t); 1665 mnew = m2; 1666 m2 = m2->m_next; 1667 } 1668 siz = MIN(mlen, olen); 1669 if (tcp != fcp) 1670 bcopy(fcp, tcp, siz); 1671 mnew->m_len += siz; 1672 mlen -= siz; 1673 olen -= siz; 1674 tcp += siz; 1675 fcp += siz; 1676 } 1677 m = m->m_next; 1678 if (m) { 1679 olen = m->m_len; 1680 fcp = mtod(m, caddr_t); 1681 } 1682 } 1683 1684 /* 1685 * Finally, set m_len == 0 for any trailing mbufs that have 1686 * been copied out of. 1687 */ 1688 while (m2) { 1689 m2->m_len = 0; 1690 m2 = m2->m_next; 1691 } 1692 return; 1693 } 1694 m = m->m_next; 1695 } 1696 } 1697 1698 /* 1699 * Socket upcall routine for the nfsd sockets. 1700 * The caddr_t arg is a pointer to the "struct nfssvc_sock". 1701 * Essentially do as much as possible non-blocking, else punt and it will 1702 * be called with M_WAIT from an nfsd. 1703 */ 1704 void 1705 nfsrv_rcv(so, arg, waitflag) 1706 struct socket *so; 1707 caddr_t arg; 1708 int waitflag; 1709 { 1710 register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg; 1711 register struct mbuf *m; 1712 struct mbuf *mp, *nam; 1713 struct uio auio; 1714 int flags, error; 1715 1716 if ((slp->ns_flag & SLP_VALID) == 0) 1717 return; 1718 #ifdef notdef 1719 /* 1720 * Define this to test for nfsds handling this under heavy load. 1721 */ 1722 if (waitflag == M_DONTWAIT) { 1723 slp->ns_flag |= SLP_NEEDQ; goto dorecs; 1724 } 1725 #endif 1726 if (so->so_type == SOCK_STREAM) { 1727 /* 1728 * If there are already records on the queue, defer soreceive() 1729 * to an nfsd so that there is feedback to the TCP layer that 1730 * the nfs servers are heavily loaded. 1731 */ 1732 if (slp->ns_rec && waitflag == M_DONTWAIT) { 1733 slp->ns_flag |= SLP_NEEDQ; 1734 goto dorecs; 1735 } 1736 1737 /* 1738 * Do soreceive(). 1739 */ 1740 auio.uio_resid = 1000000000; 1741 flags = MSG_DONTWAIT; 1742 error = soreceive(so, &nam, &auio, &mp, (struct mbuf **)0, &flags); 1743 if (error || mp == (struct mbuf *)0) { 1744 if (error == EWOULDBLOCK) 1745 slp->ns_flag |= SLP_NEEDQ; 1746 else 1747 slp->ns_flag |= SLP_DISCONN; 1748 goto dorecs; 1749 } 1750 m = mp; 1751 if (slp->ns_rawend) { 1752 slp->ns_rawend->m_next = m; 1753 slp->ns_cc += 1000000000 - auio.uio_resid; 1754 } else { 1755 slp->ns_raw = m; 1756 slp->ns_cc = 1000000000 - auio.uio_resid; 1757 } 1758 while (m->m_next) 1759 m = m->m_next; 1760 slp->ns_rawend = m; 1761 1762 /* 1763 * Now try and parse record(s) out of the raw stream data. 1764 */ 1765 if (error = nfsrv_getstream(slp, waitflag)) { 1766 if (error == EPERM) 1767 slp->ns_flag |= SLP_DISCONN; 1768 else 1769 slp->ns_flag |= SLP_NEEDQ; 1770 } 1771 } else { 1772 do { 1773 auio.uio_resid = 1000000000; 1774 flags = MSG_DONTWAIT; 1775 error = soreceive(so, &nam, &auio, &mp, 1776 (struct mbuf **)0, &flags); 1777 if (mp) { 1778 nfs_realign(mp, 10 * NFSX_UNSIGNED); 1779 if (nam) { 1780 m = nam; 1781 m->m_next = mp; 1782 } else 1783 m = mp; 1784 if (slp->ns_recend) 1785 slp->ns_recend->m_nextpkt = m; 1786 else 1787 slp->ns_rec = m; 1788 slp->ns_recend = m; 1789 m->m_nextpkt = (struct mbuf *)0; 1790 } 1791 if (error) { 1792 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) 1793 && error != EWOULDBLOCK) { 1794 slp->ns_flag |= SLP_DISCONN; 1795 goto dorecs; 1796 } 1797 } 1798 } while (mp); 1799 } 1800 1801 /* 1802 * Now try and process the request records, non-blocking. 1803 */ 1804 dorecs: 1805 if (waitflag == M_DONTWAIT && 1806 (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)))) 1807 nfsrv_wakenfsd(slp); 1808 } 1809 1810 /* 1811 * Try and extract an RPC request from the mbuf data list received on a 1812 * stream socket. The "waitflag" argument indicates whether or not it 1813 * can sleep. 1814 */ 1815 nfsrv_getstream(slp, waitflag) 1816 register struct nfssvc_sock *slp; 1817 int waitflag; 1818 { 1819 register struct mbuf *m; 1820 register char *cp1, *cp2; 1821 register int len; 1822 struct mbuf *om, *m2, *recm; 1823 u_long recmark; 1824 1825 if (slp->ns_flag & SLP_GETSTREAM) 1826 panic("nfs getstream"); 1827 slp->ns_flag |= SLP_GETSTREAM; 1828 for (;;) { 1829 if (slp->ns_reclen == 0) { 1830 if (slp->ns_cc < NFSX_UNSIGNED) { 1831 slp->ns_flag &= ~SLP_GETSTREAM; 1832 return (0); 1833 } 1834 m = slp->ns_raw; 1835 if (m->m_len >= NFSX_UNSIGNED) { 1836 bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED); 1837 m->m_data += NFSX_UNSIGNED; 1838 m->m_len -= NFSX_UNSIGNED; 1839 } else { 1840 cp1 = (caddr_t)&recmark; 1841 cp2 = mtod(m, caddr_t); 1842 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) { 1843 while (m->m_len == 0) { 1844 m = m->m_next; 1845 cp2 = mtod(m, caddr_t); 1846 } 1847 *cp1++ = *cp2++; 1848 m->m_data++; 1849 m->m_len--; 1850 } 1851 } 1852 slp->ns_cc -= NFSX_UNSIGNED; 1853 slp->ns_reclen = ntohl(recmark) & ~0x80000000; 1854 if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) { 1855 slp->ns_flag &= ~SLP_GETSTREAM; 1856 return (EPERM); 1857 } 1858 } 1859 1860 /* 1861 * Now get the record part. 1862 */ 1863 if (slp->ns_cc == slp->ns_reclen) { 1864 recm = slp->ns_raw; 1865 slp->ns_raw = slp->ns_rawend = (struct mbuf *)0; 1866 slp->ns_cc = slp->ns_reclen = 0; 1867 } else if (slp->ns_cc > slp->ns_reclen) { 1868 len = 0; 1869 m = slp->ns_raw; 1870 om = (struct mbuf *)0; 1871 while (len < slp->ns_reclen) { 1872 if ((len + m->m_len) > slp->ns_reclen) { 1873 m2 = m_copym(m, 0, slp->ns_reclen - len, 1874 waitflag); 1875 if (m2) { 1876 if (om) { 1877 om->m_next = m2; 1878 recm = slp->ns_raw; 1879 } else 1880 recm = m2; 1881 m->m_data += slp->ns_reclen - len; 1882 m->m_len -= slp->ns_reclen - len; 1883 len = slp->ns_reclen; 1884 } else { 1885 slp->ns_flag &= ~SLP_GETSTREAM; 1886 return (EWOULDBLOCK); 1887 } 1888 } else if ((len + m->m_len) == slp->ns_reclen) { 1889 om = m; 1890 len += m->m_len; 1891 m = m->m_next; 1892 recm = slp->ns_raw; 1893 om->m_next = (struct mbuf *)0; 1894 } else { 1895 om = m; 1896 len += m->m_len; 1897 m = m->m_next; 1898 } 1899 } 1900 slp->ns_raw = m; 1901 slp->ns_cc -= len; 1902 slp->ns_reclen = 0; 1903 } else { 1904 slp->ns_flag &= ~SLP_GETSTREAM; 1905 return (0); 1906 } 1907 nfs_realign(recm, 10 * NFSX_UNSIGNED); 1908 if (slp->ns_recend) 1909 slp->ns_recend->m_nextpkt = recm; 1910 else 1911 slp->ns_rec = recm; 1912 slp->ns_recend = recm; 1913 } 1914 } 1915 1916 /* 1917 * Parse an RPC header. 1918 */ 1919 nfsrv_dorec(slp, nd) 1920 register struct nfssvc_sock *slp; 1921 register struct nfsd *nd; 1922 { 1923 register struct mbuf *m; 1924 int error; 1925 1926 if ((slp->ns_flag & SLP_VALID) == 0 || 1927 (m = slp->ns_rec) == (struct mbuf *)0) 1928 return (ENOBUFS); 1929 if (slp->ns_rec = m->m_nextpkt) 1930 m->m_nextpkt = (struct mbuf *)0; 1931 else 1932 slp->ns_recend = (struct mbuf *)0; 1933 if (m->m_type == MT_SONAME) { 1934 nd->nd_nam = m; 1935 nd->nd_md = nd->nd_mrep = m->m_next; 1936 m->m_next = (struct mbuf *)0; 1937 } else { 1938 nd->nd_nam = (struct mbuf *)0; 1939 nd->nd_md = nd->nd_mrep = m; 1940 } 1941 nd->nd_dpos = mtod(nd->nd_md, caddr_t); 1942 if (error = nfs_getreq(nd, TRUE)) { 1943 m_freem(nd->nd_nam); 1944 return (error); 1945 } 1946 return (0); 1947 } 1948 1949 /* 1950 * Parse an RPC request 1951 * - verify it 1952 * - fill in the cred struct. 1953 */ 1954 nfs_getreq(nd, has_header) 1955 register struct nfsd *nd; 1956 int has_header; 1957 { 1958 register int len, i; 1959 register u_long *tl; 1960 register long t1; 1961 struct uio uio; 1962 struct iovec iov; 1963 caddr_t dpos, cp2; 1964 u_long nfsvers, auth_type; 1965 int error = 0, nqnfs = 0; 1966 struct mbuf *mrep, *md; 1967 1968 mrep = nd->nd_mrep; 1969 md = nd->nd_md; 1970 dpos = nd->nd_dpos; 1971 if (has_header) { 1972 nfsm_dissect(tl, u_long *, 10*NFSX_UNSIGNED); 1973 nd->nd_retxid = *tl++; 1974 if (*tl++ != rpc_call) { 1975 m_freem(mrep); 1976 return (EBADRPC); 1977 } 1978 } else { 1979 nfsm_dissect(tl, u_long *, 8*NFSX_UNSIGNED); 1980 } 1981 nd->nd_repstat = 0; 1982 if (*tl++ != rpc_vers) { 1983 nd->nd_repstat = ERPCMISMATCH; 1984 nd->nd_procnum = NFSPROC_NOOP; 1985 return (0); 1986 } 1987 nfsvers = nfs_vers; 1988 if (*tl != nfs_prog) { 1989 if (*tl == nqnfs_prog) { 1990 nqnfs++; 1991 nfsvers = nqnfs_vers; 1992 } else { 1993 nd->nd_repstat = EPROGUNAVAIL; 1994 nd->nd_procnum = NFSPROC_NOOP; 1995 return (0); 1996 } 1997 } 1998 tl++; 1999 if (*tl++ != nfsvers) { 2000 nd->nd_repstat = EPROGMISMATCH; 2001 nd->nd_procnum = NFSPROC_NOOP; 2002 return (0); 2003 } 2004 nd->nd_procnum = fxdr_unsigned(u_long, *tl++); 2005 if (nd->nd_procnum == NFSPROC_NULL) 2006 return (0); 2007 if (nd->nd_procnum >= NFS_NPROCS || 2008 (!nqnfs && nd->nd_procnum > NFSPROC_STATFS) || 2009 (*tl != rpc_auth_unix && *tl != rpc_auth_kerb)) { 2010 nd->nd_repstat = EPROCUNAVAIL; 2011 nd->nd_procnum = NFSPROC_NOOP; 2012 return (0); 2013 } 2014 auth_type = *tl++; 2015 len = fxdr_unsigned(int, *tl++); 2016 if (len < 0 || len > RPCAUTH_MAXSIZ) { 2017 m_freem(mrep); 2018 return (EBADRPC); 2019 } 2020 2021 /* 2022 * Handle auth_unix or auth_kerb. 2023 */ 2024 if (auth_type == rpc_auth_unix) { 2025 len = fxdr_unsigned(int, *++tl); 2026 if (len < 0 || len > NFS_MAXNAMLEN) { 2027 m_freem(mrep); 2028 return (EBADRPC); 2029 } 2030 nfsm_adv(nfsm_rndup(len)); 2031 nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED); 2032 nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++); 2033 nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++); 2034 len = fxdr_unsigned(int, *tl); 2035 if (len < 0 || len > RPCAUTH_UNIXGIDS) { 2036 m_freem(mrep); 2037 return (EBADRPC); 2038 } 2039 nfsm_dissect(tl, u_long *, (len + 2)*NFSX_UNSIGNED); 2040 for (i = 1; i <= len; i++) 2041 if (i < NGROUPS) 2042 nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++); 2043 else 2044 tl++; 2045 nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1); 2046 } else if (auth_type == rpc_auth_kerb) { 2047 nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++); 2048 nd->nd_authlen = fxdr_unsigned(int, *tl); 2049 iov.iov_len = uio.uio_resid = nfsm_rndup(nd->nd_authlen); 2050 if (uio.uio_resid > (len - 2*NFSX_UNSIGNED)) { 2051 m_freem(mrep); 2052 return (EBADRPC); 2053 } 2054 uio.uio_offset = 0; 2055 uio.uio_iov = &iov; 2056 uio.uio_iovcnt = 1; 2057 uio.uio_segflg = UIO_SYSSPACE; 2058 iov.iov_base = (caddr_t)nd->nd_authstr; 2059 nfsm_mtouio(&uio, uio.uio_resid); 2060 nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED); 2061 nd->nd_flag |= NFSD_NEEDAUTH; 2062 } 2063 2064 /* 2065 * Do we have any use for the verifier. 2066 * According to the "Remote Procedure Call Protocol Spec." it 2067 * should be AUTH_NULL, but some clients make it AUTH_UNIX? 2068 * For now, just skip over it 2069 */ 2070 len = fxdr_unsigned(int, *++tl); 2071 if (len < 0 || len > RPCAUTH_MAXSIZ) { 2072 m_freem(mrep); 2073 return (EBADRPC); 2074 } 2075 if (len > 0) { 2076 nfsm_adv(nfsm_rndup(len)); 2077 } 2078 2079 /* 2080 * For nqnfs, get piggybacked lease request. 2081 */ 2082 if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) { 2083 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 2084 nd->nd_nqlflag = fxdr_unsigned(int, *tl); 2085 if (nd->nd_nqlflag) { 2086 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 2087 nd->nd_duration = fxdr_unsigned(int, *tl); 2088 } else 2089 nd->nd_duration = NQ_MINLEASE; 2090 } else { 2091 nd->nd_nqlflag = NQL_NOVAL; 2092 nd->nd_duration = NQ_MINLEASE; 2093 } 2094 nd->nd_md = md; 2095 nd->nd_dpos = dpos; 2096 return (0); 2097 nfsmout: 2098 return (error); 2099 } 2100 2101 /* 2102 * Search for a sleeping nfsd and wake it up. 2103 * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the 2104 * running nfsds will go look for the work in the nfssvc_sock list. 2105 */ 2106 void 2107 nfsrv_wakenfsd(slp) 2108 struct nfssvc_sock *slp; 2109 { 2110 register struct nfsd *nd = nfsd_head.nd_next; 2111 2112 if ((slp->ns_flag & SLP_VALID) == 0) 2113 return; 2114 while (nd != (struct nfsd *)&nfsd_head) { 2115 if (nd->nd_flag & NFSD_WAITING) { 2116 nd->nd_flag &= ~NFSD_WAITING; 2117 if (nd->nd_slp) 2118 panic("nfsd wakeup"); 2119 nd->nd_slp = slp; 2120 wakeup((caddr_t)nd); 2121 return; 2122 } 2123 nd = nd->nd_next; 2124 } 2125 slp->ns_flag |= SLP_DOREC; 2126 nfsd_head.nd_flag |= NFSD_CHECKSLP; 2127 } 2128 2129 nfs_msg(p, server, msg) 2130 struct proc *p; 2131 char *server, *msg; 2132 { 2133 tpr_t tpr; 2134 2135 if (p) 2136 tpr = tprintf_open(p); 2137 else 2138 tpr = NULL; 2139 tprintf(tpr, "nfs server %s: %s\n", server, msg); 2140 tprintf_close(tpr); 2141 } 2142