1 /* 2 * Copyright (c) 1989, 1991 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * %sccs.include.redist.c% 9 * 10 * @(#)nfs_socket.c 7.27 (Berkeley) 03/13/92 11 */ 12 13 /* 14 * Socket operations for use by nfs 15 */ 16 17 #include "types.h" 18 #include "param.h" 19 #include "uio.h" 20 #include "proc.h" 21 #include "signal.h" 22 #include "mount.h" 23 #include "kernel.h" 24 #include "malloc.h" 25 #include "mbuf.h" 26 #include "vnode.h" 27 #include "domain.h" 28 #include "protosw.h" 29 #include "socket.h" 30 #include "socketvar.h" 31 #include "syslog.h" 32 #include "tprintf.h" 33 #include "machine/endian.h" 34 #include "netinet/in.h" 35 #include "netinet/tcp.h" 36 #ifdef ISO 37 #include "netiso/iso.h" 38 #endif 39 #include "ufs/ufs/quota.h" 40 #include "ufs/ufs/ufsmount.h" 41 #include "rpcv2.h" 42 #include "nfsv2.h" 43 #include "nfs.h" 44 #include "xdr_subs.h" 45 #include "nfsm_subs.h" 46 #include "nfsmount.h" 47 #include "nfsnode.h" 48 #include "nfsrtt.h" 49 #include "nqnfs.h" 50 51 #define TRUE 1 52 #define FALSE 0 53 54 int netnetnet = sizeof (struct netaddrhash); 55 /* 56 * Estimate rto for an nfs rpc sent via. an unreliable datagram. 57 * Use the mean and mean deviation of rtt for the appropriate type of rpc 58 * for the frequent rpcs and a default for the others. 59 * The justification for doing "other" this way is that these rpcs 60 * happen so infrequently that timer est. would probably be stale. 61 * Also, since many of these rpcs are 62 * non-idempotent, a conservative timeout is desired. 63 * getattr, lookup - A+2D 64 * read, write - A+4D 65 * other - nm_timeo 66 */ 67 #define NFS_RTO(n, t) \ 68 ((t) == 0 ? (n)->nm_timeo : \ 69 ((t) < 3 ? \ 70 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \ 71 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1))) 72 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1] 73 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1] 74 /* 75 * External data, mostly RPC constants in XDR form 76 */ 77 extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix, 78 rpc_msgaccepted, rpc_call, rpc_autherr, rpc_rejectedcred, 79 rpc_auth_kerb; 80 extern u_long nfs_prog, nfs_vers, nqnfs_prog, nqnfs_vers; 81 extern time_t nqnfsstarttime; 82 extern int nonidempotent[NFS_NPROCS]; 83 84 /* 85 * Maps errno values to nfs error numbers. 86 * Use NFSERR_IO as the catch all for ones not specifically defined in 87 * RFC 1094. 88 */ 89 static int nfsrv_errmap[ELAST] = { 90 NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO, 91 NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 92 NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO, 93 NFSERR_IO, NFSERR_EXIST, NFSERR_IO, NFSERR_NODEV, NFSERR_NOTDIR, 94 NFSERR_ISDIR, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 95 NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS, 96 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 97 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 98 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 99 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 100 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 101 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 102 NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO, 103 NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE, 104 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 105 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 106 NFSERR_IO, 107 }; 108 109 /* 110 * Defines which timer to use for the procnum. 111 * 0 - default 112 * 1 - getattr 113 * 2 - lookup 114 * 3 - read 115 * 4 - write 116 */ 117 static int proct[NFS_NPROCS] = { 118 0, 1, 0, 0, 2, 3, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 0, 119 }; 120 121 /* 122 * There is a congestion window for outstanding rpcs maintained per mount 123 * point. The cwnd size is adjusted in roughly the way that: 124 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of 125 * SIGCOMM '88". ACM, August 1988. 126 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout 127 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd 128 * of rpcs is in progress. 129 * (The sent count and cwnd are scaled for integer arith.) 130 * Variants of "slow start" were tried and were found to be too much of a 131 * performance hit (ave. rtt 3 times larger), 132 * I suspect due to the large rtt that nfs rpcs have. 133 */ 134 #define NFS_CWNDSCALE 256 135 #define NFS_MAXCWND (NFS_CWNDSCALE * 32) 136 static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, }; 137 int nfs_sbwait(); 138 void nfs_disconnect(), nfs_realign(), nfsrv_wakenfsd(), nfs_sndunlock(); 139 void nfs_rcvunlock(), nqnfs_serverd(); 140 struct mbuf *nfsm_rpchead(); 141 int nfsrtton = 0; 142 struct nfsrtt nfsrtt; 143 struct nfsd nfsd_head; 144 145 int nfsrv_null(), 146 nfsrv_getattr(), 147 nfsrv_setattr(), 148 nfsrv_lookup(), 149 nfsrv_readlink(), 150 nfsrv_read(), 151 nfsrv_write(), 152 nfsrv_create(), 153 nfsrv_remove(), 154 nfsrv_rename(), 155 nfsrv_link(), 156 nfsrv_symlink(), 157 nfsrv_mkdir(), 158 nfsrv_rmdir(), 159 nfsrv_readdir(), 160 nfsrv_statfs(), 161 nfsrv_noop(), 162 nqnfsrv_readdirlook(), 163 nqnfsrv_getlease(), 164 nqnfsrv_vacated(); 165 166 int (*nfsrv_procs[NFS_NPROCS])() = { 167 nfsrv_null, 168 nfsrv_getattr, 169 nfsrv_setattr, 170 nfsrv_noop, 171 nfsrv_lookup, 172 nfsrv_readlink, 173 nfsrv_read, 174 nfsrv_noop, 175 nfsrv_write, 176 nfsrv_create, 177 nfsrv_remove, 178 nfsrv_rename, 179 nfsrv_link, 180 nfsrv_symlink, 181 nfsrv_mkdir, 182 nfsrv_rmdir, 183 nfsrv_readdir, 184 nfsrv_statfs, 185 nqnfsrv_readdirlook, 186 nqnfsrv_getlease, 187 nqnfsrv_vacated, 188 }; 189 190 struct nfsreq nfsreqh; 191 192 /* 193 * Initialize sockets and congestion for a new NFS connection. 194 * We do not free the sockaddr if error. 195 */ 196 nfs_connect(nmp, rep) 197 register struct nfsmount *nmp; 198 struct nfsreq *rep; 199 { 200 register struct socket *so; 201 int s, error, rcvreserve, sndreserve; 202 struct mbuf *m; 203 204 nmp->nm_so = (struct socket *)0; 205 if (error = socreate(mtod(nmp->nm_nam, struct sockaddr *)->sa_family, 206 &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto)) 207 goto bad; 208 so = nmp->nm_so; 209 nmp->nm_soflags = so->so_proto->pr_flags; 210 211 /* 212 * Protocols that do not require connections may be optionally left 213 * unconnected for servers that reply from a port other than NFS_PORT. 214 */ 215 if (nmp->nm_flag & NFSMNT_NOCONN) { 216 if (nmp->nm_soflags & PR_CONNREQUIRED) { 217 error = ENOTCONN; 218 goto bad; 219 } 220 } else { 221 if (error = soconnect(so, nmp->nm_nam)) 222 goto bad; 223 224 /* 225 * Wait for the connection to complete. Cribbed from the 226 * connect system call but with the wait timing out so 227 * that interruptible mounts don't hang here for a long time. 228 */ 229 s = splnet(); 230 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 231 (void) tsleep((caddr_t)&so->so_timeo, PSOCK, 232 "nfscon", 2 * hz); 233 if ((so->so_state & SS_ISCONNECTING) && 234 so->so_error == 0 && rep && 235 (error = nfs_sigintr(nmp, rep, rep->r_procp))) { 236 so->so_state &= ~SS_ISCONNECTING; 237 splx(s); 238 goto bad; 239 } 240 } 241 if (so->so_error) { 242 error = so->so_error; 243 so->so_error = 0; 244 splx(s); 245 goto bad; 246 } 247 splx(s); 248 } 249 if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) { 250 so->so_rcv.sb_timeo = (5 * hz); 251 so->so_snd.sb_timeo = (5 * hz); 252 } else { 253 so->so_rcv.sb_timeo = 0; 254 so->so_snd.sb_timeo = 0; 255 } 256 if (nmp->nm_sotype == SOCK_DGRAM) { 257 sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR; 258 rcvreserve = nmp->nm_rsize + NFS_MAXPKTHDR; 259 } else if (nmp->nm_sotype == SOCK_SEQPACKET) { 260 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2; 261 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2; 262 } else { 263 if (nmp->nm_sotype != SOCK_STREAM) 264 panic("nfscon sotype"); 265 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 266 MGET(m, M_WAIT, MT_SOOPTS); 267 *mtod(m, int *) = 1; 268 m->m_len = sizeof(int); 269 sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m); 270 } 271 if (so->so_proto->pr_protocol == IPPROTO_TCP) { 272 MGET(m, M_WAIT, MT_SOOPTS); 273 *mtod(m, int *) = 1; 274 m->m_len = sizeof(int); 275 sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m); 276 } 277 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long)) 278 * 2; 279 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long)) 280 * 2; 281 } 282 if (error = soreserve(so, sndreserve, rcvreserve)) 283 goto bad; 284 so->so_rcv.sb_flags |= SB_NOINTR; 285 so->so_snd.sb_flags |= SB_NOINTR; 286 287 /* Initialize other non-zero congestion variables */ 288 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] = 289 nmp->nm_srtt[4] = (NFS_TIMEO << 3); 290 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] = 291 nmp->nm_sdrtt[3] = nmp->nm_sdrtt[4] = 0; 292 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */ 293 nmp->nm_sent = 0; 294 nmp->nm_timeouts = 0; 295 return (0); 296 297 bad: 298 nfs_disconnect(nmp); 299 return (error); 300 } 301 302 /* 303 * Reconnect routine: 304 * Called when a connection is broken on a reliable protocol. 305 * - clean up the old socket 306 * - nfs_connect() again 307 * - set R_MUSTRESEND for all outstanding requests on mount point 308 * If this fails the mount point is DEAD! 309 * nb: Must be called with the nfs_sndlock() set on the mount point. 310 */ 311 nfs_reconnect(rep) 312 register struct nfsreq *rep; 313 { 314 register struct nfsreq *rp; 315 register struct nfsmount *nmp = rep->r_nmp; 316 int error; 317 318 nfs_disconnect(nmp); 319 while (error = nfs_connect(nmp, rep)) { 320 if (error == EINTR || error == ERESTART) 321 return (EINTR); 322 (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); 323 } 324 325 /* 326 * Loop through outstanding request list and fix up all requests 327 * on old socket. 328 */ 329 rp = nfsreqh.r_next; 330 while (rp != &nfsreqh) { 331 if (rp->r_nmp == nmp) 332 rp->r_flags |= R_MUSTRESEND; 333 rp = rp->r_next; 334 } 335 return (0); 336 } 337 338 /* 339 * NFS disconnect. Clean up and unlink. 340 */ 341 void 342 nfs_disconnect(nmp) 343 register struct nfsmount *nmp; 344 { 345 register struct socket *so; 346 347 if (nmp->nm_so) { 348 so = nmp->nm_so; 349 nmp->nm_so = (struct socket *)0; 350 soshutdown(so, 2); 351 soclose(so); 352 } 353 } 354 355 /* 356 * This is the nfs send routine. For connection based socket types, it 357 * must be called with an nfs_sndlock() on the socket. 358 * "rep == NULL" indicates that it has been called from a server. 359 * For the client side: 360 * - return EINTR if the RPC is terminated, 0 otherwise 361 * - set R_MUSTRESEND if the send fails for any reason 362 * - do any cleanup required by recoverable socket errors (???) 363 * For the server side: 364 * - return EINTR or ERESTART if interrupted by a signal 365 * - return EPIPE if a connection is lost for connection based sockets (TCP...) 366 * - do any cleanup required by recoverable socket errors (???) 367 */ 368 nfs_send(so, nam, top, rep) 369 register struct socket *so; 370 struct mbuf *nam; 371 register struct mbuf *top; 372 struct nfsreq *rep; 373 { 374 struct mbuf *sendnam; 375 int error, soflags, flags; 376 377 if (rep) { 378 if (rep->r_flags & R_SOFTTERM) { 379 m_freem(top); 380 return (EINTR); 381 } 382 if ((so = rep->r_nmp->nm_so) == NULL) { 383 rep->r_flags |= R_MUSTRESEND; 384 m_freem(top); 385 return (0); 386 } 387 rep->r_flags &= ~R_MUSTRESEND; 388 soflags = rep->r_nmp->nm_soflags; 389 } else 390 soflags = so->so_proto->pr_flags; 391 if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) 392 sendnam = (struct mbuf *)0; 393 else 394 sendnam = nam; 395 if (so->so_type == SOCK_SEQPACKET) 396 flags = MSG_EOR; 397 else 398 flags = 0; 399 400 error = sosend(so, sendnam, (struct uio *)0, top, 401 (struct mbuf *)0, flags); 402 if (error) { 403 if (rep) { 404 log(LOG_INFO, "nfs send error %d for server %s\n",error, 405 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 406 /* 407 * Deal with errors for the client side. 408 */ 409 if (rep->r_flags & R_SOFTTERM) 410 error = EINTR; 411 else 412 rep->r_flags |= R_MUSTRESEND; 413 } else 414 log(LOG_INFO, "nfsd send error %d\n", error); 415 416 /* 417 * Handle any recoverable (soft) socket errors here. (???) 418 */ 419 if (error != EINTR && error != ERESTART && 420 error != EWOULDBLOCK && error != EPIPE) 421 error = 0; 422 } 423 return (error); 424 } 425 426 /* 427 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all 428 * done by soreceive(), but for SOCK_STREAM we must deal with the Record 429 * Mark and consolidate the data into a new mbuf list. 430 * nb: Sometimes TCP passes the data up to soreceive() in long lists of 431 * small mbufs. 432 * For SOCK_STREAM we must be very careful to read an entire record once 433 * we have read any of it, even if the system call has been interrupted. 434 */ 435 nfs_receive(rep, aname, mp) 436 register struct nfsreq *rep; 437 struct mbuf **aname; 438 struct mbuf **mp; 439 { 440 register struct socket *so; 441 struct uio auio; 442 struct iovec aio; 443 register struct mbuf *m; 444 struct mbuf *control; 445 u_long len; 446 struct mbuf **getnam; 447 int error, sotype, rcvflg; 448 struct proc *p = curproc; /* XXX */ 449 450 /* 451 * Set up arguments for soreceive() 452 */ 453 *mp = (struct mbuf *)0; 454 *aname = (struct mbuf *)0; 455 sotype = rep->r_nmp->nm_sotype; 456 457 /* 458 * For reliable protocols, lock against other senders/receivers 459 * in case a reconnect is necessary. 460 * For SOCK_STREAM, first get the Record Mark to find out how much 461 * more there is to get. 462 * We must lock the socket against other receivers 463 * until we have an entire rpc request/reply. 464 */ 465 if (sotype != SOCK_DGRAM) { 466 if (error = nfs_sndlock(&rep->r_nmp->nm_flag, rep)) 467 return (error); 468 tryagain: 469 /* 470 * Check for fatal errors and resending request. 471 */ 472 /* 473 * Ugh: If a reconnect attempt just happened, nm_so 474 * would have changed. NULL indicates a failed 475 * attempt that has essentially shut down this 476 * mount point. 477 */ 478 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) { 479 nfs_sndunlock(&rep->r_nmp->nm_flag); 480 return (EINTR); 481 } 482 if ((so = rep->r_nmp->nm_so) == NULL) { 483 if (error = nfs_reconnect(rep)) { 484 nfs_sndunlock(&rep->r_nmp->nm_flag); 485 return (error); 486 } 487 goto tryagain; 488 } 489 while (rep->r_flags & R_MUSTRESEND) { 490 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 491 nfsstats.rpcretries++; 492 if (error = nfs_send(so, rep->r_nmp->nm_nam, m, rep)) { 493 if (error == EINTR || error == ERESTART || 494 (error = nfs_reconnect(rep))) { 495 nfs_sndunlock(&rep->r_nmp->nm_flag); 496 return (error); 497 } 498 goto tryagain; 499 } 500 } 501 nfs_sndunlock(&rep->r_nmp->nm_flag); 502 if (sotype == SOCK_STREAM) { 503 aio.iov_base = (caddr_t) &len; 504 aio.iov_len = sizeof(u_long); 505 auio.uio_iov = &aio; 506 auio.uio_iovcnt = 1; 507 auio.uio_segflg = UIO_SYSSPACE; 508 auio.uio_rw = UIO_READ; 509 auio.uio_offset = 0; 510 auio.uio_resid = sizeof(u_long); 511 auio.uio_procp = p; 512 do { 513 rcvflg = MSG_WAITALL; 514 error = soreceive(so, (struct mbuf **)0, &auio, 515 (struct mbuf **)0, (struct mbuf **)0, &rcvflg); 516 if (error == EWOULDBLOCK && rep) { 517 if (rep->r_flags & R_SOFTTERM) 518 return (EINTR); 519 } 520 } while (error == EWOULDBLOCK); 521 if (!error && auio.uio_resid > 0) { 522 log(LOG_INFO, 523 "short receive (%d/%d) from nfs server %s\n", 524 sizeof(u_long) - auio.uio_resid, 525 sizeof(u_long), 526 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 527 error = EPIPE; 528 } 529 if (error) 530 goto errout; 531 len = ntohl(len) & ~0x80000000; 532 /* 533 * This is SERIOUS! We are out of sync with the sender 534 * and forcing a disconnect/reconnect is all I can do. 535 */ 536 if (len > NFS_MAXPACKET) { 537 log(LOG_ERR, "%s (%d) from nfs server %s\n", 538 "impossible packet length", 539 len, 540 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 541 error = EFBIG; 542 goto errout; 543 } 544 auio.uio_resid = len; 545 do { 546 rcvflg = MSG_WAITALL; 547 error = soreceive(so, (struct mbuf **)0, 548 &auio, mp, (struct mbuf **)0, &rcvflg); 549 } while (error == EWOULDBLOCK || error == EINTR || 550 error == ERESTART); 551 if (!error && auio.uio_resid > 0) { 552 log(LOG_INFO, 553 "short receive (%d/%d) from nfs server %s\n", 554 len - auio.uio_resid, len, 555 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 556 error = EPIPE; 557 } 558 } else { 559 /* 560 * NB: Since uio_resid is big, MSG_WAITALL is ignored 561 * and soreceive() will return when it has either a 562 * control msg or a data msg. 563 * We have no use for control msg., but must grab them 564 * and then throw them away so we know what is going 565 * on. 566 */ 567 auio.uio_resid = len = 100000000; /* Anything Big */ 568 auio.uio_procp = p; 569 do { 570 rcvflg = 0; 571 error = soreceive(so, (struct mbuf **)0, 572 &auio, mp, &control, &rcvflg); 573 if (control) 574 m_freem(control); 575 if (error == EWOULDBLOCK && rep) { 576 if (rep->r_flags & R_SOFTTERM) 577 return (EINTR); 578 } 579 } while (error == EWOULDBLOCK || 580 (!error && *mp == NULL && control)); 581 if ((rcvflg & MSG_EOR) == 0) 582 printf("Egad!!\n"); 583 if (!error && *mp == NULL) 584 error = EPIPE; 585 len -= auio.uio_resid; 586 } 587 errout: 588 if (error && error != EINTR && error != ERESTART) { 589 m_freem(*mp); 590 *mp = (struct mbuf *)0; 591 if (error != EPIPE) 592 log(LOG_INFO, 593 "receive error %d from nfs server %s\n", 594 error, 595 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 596 error = nfs_sndlock(&rep->r_nmp->nm_flag, rep); 597 if (!error) 598 error = nfs_reconnect(rep); 599 if (!error) 600 goto tryagain; 601 } 602 } else { 603 if ((so = rep->r_nmp->nm_so) == NULL) 604 return (EACCES); 605 if (so->so_state & SS_ISCONNECTED) 606 getnam = (struct mbuf **)0; 607 else 608 getnam = aname; 609 auio.uio_resid = len = 1000000; 610 auio.uio_procp = p; 611 do { 612 rcvflg = 0; 613 error = soreceive(so, getnam, &auio, mp, 614 (struct mbuf **)0, &rcvflg); 615 if (error == EWOULDBLOCK && 616 (rep->r_flags & R_SOFTTERM)) 617 return (EINTR); 618 } while (error == EWOULDBLOCK); 619 len -= auio.uio_resid; 620 } 621 if (error) { 622 m_freem(*mp); 623 *mp = (struct mbuf *)0; 624 } 625 /* 626 * Search for any mbufs that are not a multiple of 4 bytes long 627 * or with m_data not longword aligned. 628 * These could cause pointer alignment problems, so copy them to 629 * well aligned mbufs. 630 */ 631 nfs_realign(*mp, 5 * NFSX_UNSIGNED); 632 return (error); 633 } 634 635 /* 636 * Implement receipt of reply on a socket. 637 * We must search through the list of received datagrams matching them 638 * with outstanding requests using the xid, until ours is found. 639 */ 640 /* ARGSUSED */ 641 nfs_reply(myrep) 642 struct nfsreq *myrep; 643 { 644 register struct nfsreq *rep; 645 register struct nfsmount *nmp = myrep->r_nmp; 646 register long t1; 647 struct mbuf *mrep, *nam, *md; 648 u_long rxid, *tl; 649 caddr_t dpos, cp2; 650 int error; 651 652 /* 653 * Loop around until we get our own reply 654 */ 655 for (;;) { 656 /* 657 * Lock against other receivers so that I don't get stuck in 658 * sbwait() after someone else has received my reply for me. 659 * Also necessary for connection based protocols to avoid 660 * race conditions during a reconnect. 661 */ 662 if (error = nfs_rcvlock(myrep)) 663 return (error); 664 /* Already received, bye bye */ 665 if (myrep->r_mrep != NULL) { 666 nfs_rcvunlock(&nmp->nm_flag); 667 return (0); 668 } 669 /* 670 * Get the next Rpc reply off the socket 671 */ 672 error = nfs_receive(myrep, &nam, &mrep); 673 nfs_rcvunlock(&nmp->nm_flag); 674 if (error) printf("rcv err=%d\n",error); 675 if (error) { 676 677 /* 678 * Ignore routing errors on connectionless protocols?? 679 */ 680 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { 681 nmp->nm_so->so_error = 0; 682 continue; 683 } 684 return (error); 685 } 686 if (nam) 687 m_freem(nam); 688 689 /* 690 * Get the xid and check that it is an rpc reply 691 */ 692 md = mrep; 693 dpos = mtod(md, caddr_t); 694 nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED); 695 rxid = *tl++; 696 if (*tl != rpc_reply) { 697 if (nmp->nm_flag & NFSMNT_NQNFS) { 698 if (nqnfs_callback(nmp, mrep, md, dpos)) 699 nfsstats.rpcinvalid++; 700 } else { 701 nfsstats.rpcinvalid++; 702 m_freem(mrep); 703 } 704 nfsmout: 705 continue; 706 } 707 708 /* 709 * Loop through the request list to match up the reply 710 * Iff no match, just drop the datagram 711 */ 712 rep = nfsreqh.r_next; 713 while (rep != &nfsreqh) { 714 if (rep->r_mrep == NULL && rxid == rep->r_xid) { 715 /* Found it.. */ 716 rep->r_mrep = mrep; 717 rep->r_md = md; 718 rep->r_dpos = dpos; 719 if (nfsrtton) { 720 struct rttl *rt; 721 722 rt = &nfsrtt.rttl[nfsrtt.pos]; 723 rt->proc = rep->r_procnum; 724 rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]); 725 rt->sent = nmp->nm_sent; 726 rt->cwnd = nmp->nm_cwnd; 727 rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1]; 728 rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1]; 729 rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid; 730 rt->tstamp = time; 731 if (rep->r_flags & R_TIMING) 732 rt->rtt = rep->r_rtt; 733 else 734 rt->rtt = 1000000; 735 nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ; 736 } 737 /* 738 * Update congestion window. 739 * Do the additive increase of 740 * one rpc/rtt. 741 */ 742 if (nmp->nm_cwnd <= nmp->nm_sent) { 743 nmp->nm_cwnd += 744 (NFS_CWNDSCALE * NFS_CWNDSCALE + 745 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; 746 if (nmp->nm_cwnd > NFS_MAXCWND) 747 nmp->nm_cwnd = NFS_MAXCWND; 748 } 749 nmp->nm_sent -= NFS_CWNDSCALE; 750 /* 751 * Update rtt using a gain of 0.125 on the mean 752 * and a gain of 0.25 on the deviation. 753 */ 754 if (rep->r_flags & R_TIMING) { 755 /* 756 * Since the timer resolution of 757 * NFS_HZ is so course, it can often 758 * result in r_rtt == 0. Since 759 * r_rtt == N means that the actual 760 * rtt is between N+dt and N+2-dt ticks, 761 * add 1. 762 */ 763 t1 = rep->r_rtt + 1; 764 t1 -= (NFS_SRTT(rep) >> 3); 765 NFS_SRTT(rep) += t1; 766 if (t1 < 0) 767 t1 = -t1; 768 t1 -= (NFS_SDRTT(rep) >> 2); 769 NFS_SDRTT(rep) += t1; 770 } 771 nmp->nm_timeouts = 0; 772 break; 773 } 774 rep = rep->r_next; 775 } 776 /* 777 * If not matched to a request, drop it. 778 * If it's mine, get out. 779 */ 780 if (rep == &nfsreqh) { 781 nfsstats.rpcunexpected++; 782 m_freem(mrep); 783 } else if (rep == myrep) 784 return (0); 785 } 786 } 787 788 /* 789 * nfs_request - goes something like this 790 * - fill in request struct 791 * - links it into list 792 * - calls nfs_send() for first transmit 793 * - calls nfs_receive() to get reply 794 * - break down rpc header and return with nfs reply pointed to 795 * by mrep or error 796 * nb: always frees up mreq mbuf list 797 */ 798 nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp) 799 struct vnode *vp; 800 struct mbuf *mrest; 801 int procnum; 802 struct proc *procp; 803 struct ucred *cred; 804 struct mbuf **mrp; 805 struct mbuf **mdp; 806 caddr_t *dposp; 807 { 808 register struct mbuf *m, *mrep; 809 register struct nfsreq *rep; 810 register u_long *tl; 811 register int i; 812 struct nfsmount *nmp; 813 struct mbuf *md, *mheadend; 814 struct nfsreq *reph; 815 struct nfsnode *tp, *np; 816 time_t reqtime, waituntil; 817 caddr_t dpos, cp2; 818 int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type; 819 int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0; 820 u_long xid; 821 char *auth_str; 822 823 nmp = VFSTONFS(vp->v_mount); 824 MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); 825 rep->r_nmp = nmp; 826 rep->r_vp = vp; 827 rep->r_procp = procp; 828 rep->r_procnum = procnum; 829 i = 0; 830 m = mrest; 831 while (m) { 832 i += m->m_len; 833 m = m->m_next; 834 } 835 mrest_len = i; 836 837 /* 838 * Get the RPC header with authorization. 839 */ 840 kerbauth: 841 auth_str = (char *)0; 842 if (nmp->nm_flag & NFSMNT_KERB) { 843 if (failed_auth) { 844 error = nfs_getauth(nmp, rep, cred, &auth_type, 845 &auth_str, &auth_len); 846 if (error) { 847 free((caddr_t)rep, M_NFSREQ); 848 m_freem(mrest); 849 return (error); 850 } 851 } else { 852 auth_type = RPCAUTH_UNIX; 853 auth_len = 5 * NFSX_UNSIGNED; 854 } 855 } else { 856 auth_type = RPCAUTH_UNIX; 857 auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ? 858 nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) + 859 5 * NFSX_UNSIGNED; 860 } 861 m = nfsm_rpchead(cred, (nmp->nm_flag & NFSMNT_NQNFS), procnum, 862 auth_type, auth_len, auth_str, mrest, mrest_len, &mheadend, &xid); 863 if (auth_str) 864 free(auth_str, M_TEMP); 865 866 /* 867 * For stream protocols, insert a Sun RPC Record Mark. 868 */ 869 if (nmp->nm_sotype == SOCK_STREAM) { 870 M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); 871 *mtod(m, u_long *) = htonl(0x80000000 | 872 (m->m_pkthdr.len - NFSX_UNSIGNED)); 873 } 874 rep->r_mreq = m; 875 rep->r_xid = xid; 876 tryagain: 877 if (nmp->nm_flag & NFSMNT_SOFT) 878 rep->r_retry = nmp->nm_retry; 879 else 880 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ 881 rep->r_rtt = rep->r_rexmit = 0; 882 if (proct[procnum] > 0) 883 rep->r_flags = R_TIMING; 884 else 885 rep->r_flags = 0; 886 rep->r_mrep = NULL; 887 888 /* 889 * Do the client side RPC. 890 */ 891 nfsstats.rpcrequests++; 892 /* 893 * Chain request into list of outstanding requests. Be sure 894 * to put it LAST so timer finds oldest requests first. 895 */ 896 s = splsoftclock(); 897 reph = &nfsreqh; 898 reph->r_prev->r_next = rep; 899 rep->r_prev = reph->r_prev; 900 reph->r_prev = rep; 901 rep->r_next = reph; 902 903 /* Get send time for nqnfs */ 904 reqtime = time.tv_sec; 905 906 /* 907 * If backing off another request or avoiding congestion, don't 908 * send this one now but let timer do it. If not timing a request, 909 * do it now. 910 */ 911 if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM || 912 (nmp->nm_flag & NFSMNT_DUMBTIMR) || 913 nmp->nm_sent < nmp->nm_cwnd)) { 914 splx(s); 915 if (nmp->nm_soflags & PR_CONNREQUIRED) 916 error = nfs_sndlock(&nmp->nm_flag, rep); 917 if (!error) { 918 m = m_copym(m, 0, M_COPYALL, M_WAIT); 919 error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep); 920 if (nmp->nm_soflags & PR_CONNREQUIRED) 921 nfs_sndunlock(&nmp->nm_flag); 922 } 923 if (!error && (rep->r_flags & R_MUSTRESEND) == 0) { 924 nmp->nm_sent += NFS_CWNDSCALE; 925 rep->r_flags |= R_SENT; 926 } 927 } else { 928 splx(s); 929 rep->r_rtt = -1; 930 } 931 932 /* 933 * Wait for the reply from our send or the timer's. 934 */ 935 if (!error) 936 error = nfs_reply(rep); 937 938 /* 939 * RPC done, unlink the request. 940 */ 941 s = splsoftclock(); 942 rep->r_prev->r_next = rep->r_next; 943 rep->r_next->r_prev = rep->r_prev; 944 splx(s); 945 946 /* 947 * If there was a successful reply and a tprintf msg. 948 * tprintf a response. 949 */ 950 if (!error && (rep->r_flags & R_TPRINTFMSG)) 951 nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname, 952 "is alive again"); 953 mrep = rep->r_mrep; 954 md = rep->r_md; 955 dpos = rep->r_dpos; 956 if (error) { 957 m_freem(rep->r_mreq); 958 free((caddr_t)rep, M_NFSREQ); 959 return (error); 960 } 961 962 /* 963 * break down the rpc header and check if ok 964 */ 965 nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED); 966 if (*tl++ == rpc_msgdenied) { 967 if (*tl == rpc_mismatch) 968 error = EOPNOTSUPP; 969 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) { 970 if (*tl == rpc_rejectedcred && failed_auth == 0) { 971 failed_auth++; 972 mheadend->m_next = (struct mbuf *)0; 973 m_freem(mrep); 974 m_freem(rep->r_mreq); 975 goto kerbauth; 976 } else 977 error = EAUTH; 978 } else 979 error = EACCES; 980 m_freem(mrep); 981 m_freem(rep->r_mreq); 982 free((caddr_t)rep, M_NFSREQ); 983 return (error); 984 } 985 986 /* 987 * skip over the auth_verf, someday we may want to cache auth_short's 988 * for nfs_reqhead(), but for now just dump it 989 */ 990 if (*++tl != 0) { 991 i = nfsm_rndup(fxdr_unsigned(long, *tl)); 992 nfsm_adv(i); 993 } 994 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 995 /* 0 == ok */ 996 if (*tl == 0) { 997 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 998 if (*tl != 0) { 999 error = fxdr_unsigned(int, *tl); 1000 m_freem(mrep); 1001 if ((nmp->nm_flag & NFSMNT_NQNFS) && 1002 error == NQNFS_TRYLATER) { 1003 error = 0; 1004 waituntil = time.tv_sec + trylater_delay; 1005 while (time.tv_sec < waituntil) 1006 (void) tsleep((caddr_t)&lbolt, 1007 PSOCK, "nqnfstry", 0); 1008 trylater_delay *= nfs_backoff[trylater_cnt]; 1009 if (trylater_cnt < 7) 1010 trylater_cnt++; 1011 goto tryagain; 1012 } 1013 m_freem(rep->r_mreq); 1014 free((caddr_t)rep, M_NFSREQ); 1015 return (error); 1016 } 1017 1018 /* 1019 * For nqnfs, get any lease in reply 1020 */ 1021 if (nmp->nm_flag & NFSMNT_NQNFS) { 1022 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 1023 if (*tl) { 1024 np = VTONFS(vp); 1025 nqlflag = fxdr_unsigned(int, *tl); 1026 nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED); 1027 cachable = fxdr_unsigned(int, *tl++); 1028 reqtime += fxdr_unsigned(int, *tl++); 1029 if (reqtime > time.tv_sec) { 1030 if (np->n_tnext) { 1031 if (np->n_tnext == (struct nfsnode *)nmp) 1032 nmp->nm_tprev = np->n_tprev; 1033 else 1034 np->n_tnext->n_tprev = np->n_tprev; 1035 if (np->n_tprev == (struct nfsnode *)nmp) 1036 nmp->nm_tnext = np->n_tnext; 1037 else 1038 np->n_tprev->n_tnext = np->n_tnext; 1039 if (nqlflag == NQL_WRITE) 1040 np->n_flag |= NQNFSWRITE; 1041 } else if (nqlflag == NQL_READ) 1042 np->n_flag &= ~NQNFSWRITE; 1043 else 1044 np->n_flag |= NQNFSWRITE; 1045 if (cachable) 1046 np->n_flag &= ~NQNFSNONCACHE; 1047 else 1048 np->n_flag |= NQNFSNONCACHE; 1049 np->n_expiry = reqtime; 1050 fxdr_hyper(tl, &np->n_lrev); 1051 tp = nmp->nm_tprev; 1052 while (tp != (struct nfsnode *)nmp && 1053 tp->n_expiry > np->n_expiry) 1054 tp = tp->n_tprev; 1055 if (tp == (struct nfsnode *)nmp) { 1056 np->n_tnext = nmp->nm_tnext; 1057 nmp->nm_tnext = np; 1058 } else { 1059 np->n_tnext = tp->n_tnext; 1060 tp->n_tnext = np; 1061 } 1062 np->n_tprev = tp; 1063 if (np->n_tnext == (struct nfsnode *)nmp) 1064 nmp->nm_tprev = np; 1065 else 1066 np->n_tnext->n_tprev = np; 1067 } 1068 } 1069 } 1070 *mrp = mrep; 1071 *mdp = md; 1072 *dposp = dpos; 1073 m_freem(rep->r_mreq); 1074 FREE((caddr_t)rep, M_NFSREQ); 1075 return (0); 1076 } 1077 m_freem(mrep); 1078 m_freem(rep->r_mreq); 1079 free((caddr_t)rep, M_NFSREQ); 1080 error = EPROTONOSUPPORT; 1081 nfsmout: 1082 return (error); 1083 } 1084 1085 /* 1086 * Generate the rpc reply header 1087 * siz arg. is used to decide if adding a cluster is worthwhile 1088 */ 1089 nfs_rephead(siz, nd, err, cache, frev, mrq, mbp, bposp) 1090 int siz; 1091 struct nfsd *nd; 1092 int err; 1093 int cache; 1094 u_quad_t *frev; 1095 struct mbuf **mrq; 1096 struct mbuf **mbp; 1097 caddr_t *bposp; 1098 { 1099 register u_long *tl; 1100 register struct mbuf *mreq; 1101 caddr_t bpos; 1102 struct mbuf *mb, *mb2; 1103 1104 MGETHDR(mreq, M_WAIT, MT_DATA); 1105 mb = mreq; 1106 /* 1107 * If this is a big reply, use a cluster else 1108 * try and leave leading space for the lower level headers. 1109 */ 1110 siz += RPC_REPLYSIZ; 1111 if (siz >= MINCLSIZE) { 1112 MCLGET(mreq, M_WAIT); 1113 } else 1114 mreq->m_data += max_hdr; 1115 tl = mtod(mreq, u_long *); 1116 mreq->m_len = 6*NFSX_UNSIGNED; 1117 bpos = ((caddr_t)tl)+mreq->m_len; 1118 *tl++ = nd->nd_retxid; 1119 *tl++ = rpc_reply; 1120 if (err == ERPCMISMATCH || err == NQNFS_AUTHERR) { 1121 *tl++ = rpc_msgdenied; 1122 if (err == NQNFS_AUTHERR) { 1123 *tl++ = rpc_autherr; 1124 *tl = rpc_rejectedcred; 1125 mreq->m_len -= NFSX_UNSIGNED; 1126 bpos -= NFSX_UNSIGNED; 1127 } else { 1128 *tl++ = rpc_mismatch; 1129 *tl++ = txdr_unsigned(2); 1130 *tl = txdr_unsigned(2); 1131 } 1132 } else { 1133 *tl++ = rpc_msgaccepted; 1134 *tl++ = 0; 1135 *tl++ = 0; 1136 switch (err) { 1137 case EPROGUNAVAIL: 1138 *tl = txdr_unsigned(RPC_PROGUNAVAIL); 1139 break; 1140 case EPROGMISMATCH: 1141 *tl = txdr_unsigned(RPC_PROGMISMATCH); 1142 nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED); 1143 *tl++ = txdr_unsigned(2); 1144 *tl = txdr_unsigned(2); /* someday 3 */ 1145 break; 1146 case EPROCUNAVAIL: 1147 *tl = txdr_unsigned(RPC_PROCUNAVAIL); 1148 break; 1149 default: 1150 *tl = 0; 1151 if (err != VNOVAL) { 1152 nfsm_build(tl, u_long *, NFSX_UNSIGNED); 1153 if (err) 1154 *tl = txdr_unsigned(nfsrv_errmap[err - 1]); 1155 else 1156 *tl = 0; 1157 } 1158 break; 1159 }; 1160 } 1161 1162 /* 1163 * For nqnfs, piggyback lease as requested. 1164 */ 1165 if (nd->nd_nqlflag != NQL_NOVAL && err == 0) { 1166 if (nd->nd_nqlflag) { 1167 nfsm_build(tl, u_long *, 5*NFSX_UNSIGNED); 1168 *tl++ = txdr_unsigned(nd->nd_nqlflag); 1169 *tl++ = txdr_unsigned(cache); 1170 *tl++ = txdr_unsigned(nd->nd_duration); 1171 txdr_hyper(frev, tl); 1172 } else { 1173 if (nd->nd_nqlflag != 0) 1174 panic("nqreph"); 1175 nfsm_build(tl, u_long *, NFSX_UNSIGNED); 1176 *tl = 0; 1177 } 1178 } 1179 *mrq = mreq; 1180 *mbp = mb; 1181 *bposp = bpos; 1182 if (err != 0 && err != VNOVAL) 1183 nfsstats.srvrpc_errs++; 1184 return (0); 1185 } 1186 1187 /* 1188 * Nfs timer routine 1189 * Scan the nfsreq list and retranmit any requests that have timed out 1190 * To avoid retransmission attempts on STREAM sockets (in the future) make 1191 * sure to set the r_retry field to 0 (implies nm_retry == 0). 1192 */ 1193 nfs_timer() 1194 { 1195 register struct nfsreq *rep; 1196 register struct mbuf *m; 1197 register struct socket *so; 1198 register struct nfsmount *nmp; 1199 register int timeo; 1200 static long lasttime = 0; 1201 int s, error; 1202 1203 s = splnet(); 1204 for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) { 1205 nmp = rep->r_nmp; 1206 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) 1207 continue; 1208 if (nfs_sigintr(nmp, rep, rep->r_procp)) { 1209 rep->r_flags |= R_SOFTTERM; 1210 continue; 1211 } 1212 if (rep->r_rtt >= 0) { 1213 rep->r_rtt++; 1214 if (nmp->nm_flag & NFSMNT_DUMBTIMR) 1215 timeo = nmp->nm_timeo; 1216 else 1217 timeo = NFS_RTO(nmp, proct[rep->r_procnum]); 1218 if (nmp->nm_timeouts > 0) 1219 timeo *= nfs_backoff[nmp->nm_timeouts - 1]; 1220 if (rep->r_rtt <= timeo) 1221 continue; 1222 if (nmp->nm_timeouts < 8) 1223 nmp->nm_timeouts++; 1224 } 1225 /* 1226 * Check for server not responding 1227 */ 1228 if ((rep->r_flags & R_TPRINTFMSG) == 0 && 1229 rep->r_rexmit > nmp->nm_deadthresh) { 1230 nfs_msg(rep->r_procp, 1231 nmp->nm_mountp->mnt_stat.f_mntfromname, 1232 "not responding"); 1233 rep->r_flags |= R_TPRINTFMSG; 1234 } 1235 if (rep->r_rexmit >= rep->r_retry) { /* too many */ 1236 nfsstats.rpctimeouts++; 1237 rep->r_flags |= R_SOFTTERM; 1238 continue; 1239 } 1240 if (nmp->nm_sotype != SOCK_DGRAM) { 1241 if (++rep->r_rexmit > NFS_MAXREXMIT) 1242 rep->r_rexmit = NFS_MAXREXMIT; 1243 continue; 1244 } 1245 if ((so = nmp->nm_so) == NULL) 1246 continue; 1247 1248 /* 1249 * If there is enough space and the window allows.. 1250 * Resend it 1251 * Set r_rtt to -1 in case we fail to send it now. 1252 */ 1253 rep->r_rtt = -1; 1254 if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && 1255 ((nmp->nm_flag & NFSMNT_DUMBTIMR) || 1256 (rep->r_flags & R_SENT) || 1257 nmp->nm_sent < nmp->nm_cwnd) && 1258 (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){ 1259 if ((nmp->nm_flag & NFSMNT_NOCONN) == 0) 1260 error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 1261 (struct mbuf *)0, (struct mbuf *)0); 1262 else 1263 error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 1264 nmp->nm_nam, (struct mbuf *)0); 1265 if (error) { 1266 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) 1267 so->so_error = 0; 1268 } else { 1269 /* 1270 * Iff first send, start timing 1271 * else turn timing off, backoff timer 1272 * and divide congestion window by 2. 1273 */ 1274 if (rep->r_flags & R_SENT) { 1275 rep->r_flags &= ~R_TIMING; 1276 if (++rep->r_rexmit > NFS_MAXREXMIT) 1277 rep->r_rexmit = NFS_MAXREXMIT; 1278 nmp->nm_cwnd >>= 1; 1279 if (nmp->nm_cwnd < NFS_CWNDSCALE) 1280 nmp->nm_cwnd = NFS_CWNDSCALE; 1281 nfsstats.rpcretries++; 1282 } else { 1283 rep->r_flags |= R_SENT; 1284 nmp->nm_sent += NFS_CWNDSCALE; 1285 } 1286 rep->r_rtt = 0; 1287 } 1288 } 1289 } 1290 1291 /* 1292 * Call the nqnfs server timer once a second to handle leases. 1293 */ 1294 if (lasttime != time.tv_sec) { 1295 lasttime = time.tv_sec; 1296 nqnfs_serverd(); 1297 } 1298 splx(s); 1299 timeout(nfs_timer, (caddr_t)0, hz/NFS_HZ); 1300 } 1301 1302 /* 1303 * Test for a termination condition pending on the process. 1304 * This is used for NFSMNT_INT mounts. 1305 */ 1306 nfs_sigintr(nmp, rep, p) 1307 struct nfsmount *nmp; 1308 struct nfsreq *rep; 1309 register struct proc *p; 1310 { 1311 1312 if (rep && (rep->r_flags & R_SOFTTERM)) 1313 return (EINTR); 1314 if (!(nmp->nm_flag & NFSMNT_INT)) 1315 return (0); 1316 if (p && p->p_sig && (((p->p_sig &~ p->p_sigmask) &~ p->p_sigignore) & 1317 NFSINT_SIGMASK)) 1318 return (EINTR); 1319 return (0); 1320 } 1321 1322 /* 1323 * Lock a socket against others. 1324 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply 1325 * and also to avoid race conditions between the processes with nfs requests 1326 * in progress when a reconnect is necessary. 1327 */ 1328 nfs_sndlock(flagp, rep) 1329 register int *flagp; 1330 struct nfsreq *rep; 1331 { 1332 struct proc *p; 1333 1334 if (rep) 1335 p = rep->r_procp; 1336 else 1337 p = (struct proc *)0; 1338 while (*flagp & NFSMNT_SNDLOCK) { 1339 if (nfs_sigintr(rep->r_nmp, rep, p)) 1340 return (EINTR); 1341 *flagp |= NFSMNT_WANTSND; 1342 (void) tsleep((caddr_t)flagp, PZERO-1, "nfsndlck", 0); 1343 } 1344 *flagp |= NFSMNT_SNDLOCK; 1345 return (0); 1346 } 1347 1348 /* 1349 * Unlock the stream socket for others. 1350 */ 1351 void 1352 nfs_sndunlock(flagp) 1353 register int *flagp; 1354 { 1355 1356 if ((*flagp & NFSMNT_SNDLOCK) == 0) 1357 panic("nfs sndunlock"); 1358 *flagp &= ~NFSMNT_SNDLOCK; 1359 if (*flagp & NFSMNT_WANTSND) { 1360 *flagp &= ~NFSMNT_WANTSND; 1361 wakeup((caddr_t)flagp); 1362 } 1363 } 1364 1365 nfs_rcvlock(rep) 1366 register struct nfsreq *rep; 1367 { 1368 register int *flagp = &rep->r_nmp->nm_flag; 1369 1370 while (*flagp & NFSMNT_RCVLOCK) { 1371 if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp)) 1372 return (EINTR); 1373 *flagp |= NFSMNT_WANTRCV; 1374 (void) tsleep((caddr_t)flagp, PZERO-1, "nfsrcvlck", 0); 1375 } 1376 *flagp |= NFSMNT_RCVLOCK; 1377 return (0); 1378 } 1379 1380 /* 1381 * Unlock the stream socket for others. 1382 */ 1383 void 1384 nfs_rcvunlock(flagp) 1385 register int *flagp; 1386 { 1387 1388 if ((*flagp & NFSMNT_RCVLOCK) == 0) 1389 panic("nfs rcvunlock"); 1390 *flagp &= ~NFSMNT_RCVLOCK; 1391 if (*flagp & NFSMNT_WANTRCV) { 1392 *flagp &= ~NFSMNT_WANTRCV; 1393 wakeup((caddr_t)flagp); 1394 } 1395 } 1396 1397 /* 1398 * This function compares two net addresses by family and returns TRUE 1399 * if they are the same host. 1400 * If there is any doubt, return FALSE. 1401 * The AF_INET family is handled as a special case so that address mbufs 1402 * don't need to be saved to store "struct in_addr", which is only 4 bytes. 1403 */ 1404 nfs_netaddr_match(family, haddr, hmask, nam) 1405 int family; 1406 union nethostaddr *haddr; 1407 union nethostaddr *hmask; 1408 struct mbuf *nam; 1409 { 1410 register struct sockaddr_in *inetaddr; 1411 #ifdef ISO 1412 register struct sockaddr_iso *isoaddr1, *isoaddr2; 1413 #endif 1414 1415 1416 switch (family) { 1417 case AF_INET: 1418 inetaddr = mtod(nam, struct sockaddr_in *); 1419 if (inetaddr->sin_family != AF_INET) 1420 return (0); 1421 if (hmask) { 1422 if ((inetaddr->sin_addr.s_addr & hmask->had_inetaddr) == 1423 (haddr->had_inetaddr & hmask->had_inetaddr)) 1424 return (1); 1425 } else if (inetaddr->sin_addr.s_addr == haddr->had_inetaddr) 1426 return (1); 1427 break; 1428 #ifdef ISO 1429 case AF_ISO: 1430 isoaddr1 = mtod(nam, struct sockaddr_iso *); 1431 if (isoaddr1->siso_family != AF_ISO) 1432 return (0); 1433 isoaddr2 = mtod(haddr->had_nam, struct sockaddr_iso *); 1434 if (isoaddr1->siso_nlen > 0 && 1435 isoaddr1->siso_nlen == isoaddr2->siso_nlen && 1436 SAME_ISOADDR(isoaddr1, isoaddr2)) 1437 return (1); 1438 break; 1439 #endif /* ISO */ 1440 default: 1441 break; 1442 }; 1443 return (0); 1444 } 1445 1446 /* 1447 * Build hash lists of net addresses and hang them off the mount point. 1448 * Called by ufs_mount() to set up the lists of export addresses. 1449 */ 1450 hang_addrlist(mp, argp) 1451 struct mount *mp; 1452 struct ufs_args *argp; 1453 { 1454 register struct netaddrhash *np, **hnp; 1455 register int i; 1456 struct ufsmount *ump; 1457 struct sockaddr *saddr; 1458 struct mbuf *nam, *msk = (struct mbuf *)0; 1459 union nethostaddr netmsk; 1460 int error; 1461 1462 if (error = sockargs(&nam, (caddr_t)argp->saddr, argp->slen, 1463 MT_SONAME)) 1464 return (error); 1465 saddr = mtod(nam, struct sockaddr *); 1466 ump = VFSTOUFS(mp); 1467 if (saddr->sa_family == AF_INET && 1468 ((struct sockaddr_in *)saddr)->sin_addr.s_addr == INADDR_ANY) { 1469 m_freem(nam); 1470 if (mp->mnt_flag & MNT_DEFEXPORTED) 1471 return (EPERM); 1472 np = &ump->um_defexported; 1473 np->neth_exflags = argp->exflags; 1474 np->neth_anon = argp->anon; 1475 np->neth_anon.cr_ref = 1; 1476 mp->mnt_flag |= MNT_DEFEXPORTED; 1477 return (0); 1478 } 1479 if (argp->msklen > 0) { 1480 if (error = sockargs(&msk, (caddr_t)argp->smask, argp->msklen, 1481 MT_SONAME)) { 1482 m_freem(nam); 1483 return (error); 1484 } 1485 1486 /* 1487 * Scan all the hash lists to check against duplications. 1488 * For the net list, try both masks to catch a subnet 1489 * of another network. 1490 */ 1491 hnp = &ump->um_netaddr[NETMASK_HASH]; 1492 np = *hnp; 1493 if (saddr->sa_family == AF_INET) 1494 netmsk.had_inetaddr = 1495 mtod(msk, struct sockaddr_in *)->sin_addr.s_addr; 1496 else 1497 netmsk.had_nam = msk; 1498 while (np) { 1499 if (nfs_netaddr_match(np->neth_family, &np->neth_haddr, 1500 &np->neth_hmask, nam) || 1501 nfs_netaddr_match(np->neth_family, &np->neth_haddr, 1502 &netmsk, nam)) { 1503 m_freem(nam); 1504 m_freem(msk); 1505 return (EPERM); 1506 } 1507 np = np->neth_next; 1508 } 1509 for (i = 0; i < NETHASHSZ; i++) { 1510 np = ump->um_netaddr[i]; 1511 while (np) { 1512 if (nfs_netaddr_match(np->neth_family, &np->neth_haddr, 1513 &netmsk, nam)) { 1514 m_freem(nam); 1515 m_freem(msk); 1516 return (EPERM); 1517 } 1518 np = np->neth_next; 1519 } 1520 } 1521 } else { 1522 hnp = &ump->um_netaddr[NETADDRHASH(saddr)]; 1523 np = ump->um_netaddr[NETMASK_HASH]; 1524 while (np) { 1525 if (nfs_netaddr_match(np->neth_family, &np->neth_haddr, 1526 &np->neth_hmask, nam)) { 1527 m_freem(nam); 1528 return (EPERM); 1529 } 1530 np = np->neth_next; 1531 } 1532 np = *hnp; 1533 while (np) { 1534 if (nfs_netaddr_match(np->neth_family, &np->neth_haddr, 1535 (union nethostaddr *)0, nam)) { 1536 m_freem(nam); 1537 return (EPERM); 1538 } 1539 np = np->neth_next; 1540 } 1541 } 1542 np = (struct netaddrhash *) malloc(sizeof(struct netaddrhash), M_NETADDR, 1543 M_WAITOK); 1544 np->neth_family = saddr->sa_family; 1545 if (saddr->sa_family == AF_INET) { 1546 np->neth_inetaddr = ((struct sockaddr_in *)saddr)->sin_addr.s_addr; 1547 m_freem(nam); 1548 if (msk) { 1549 np->neth_inetmask = netmsk.had_inetaddr; 1550 m_freem(msk); 1551 if (np->neth_inetaddr &~ np->neth_inetmask) 1552 return (EPERM); 1553 } else 1554 np->neth_inetmask = 0xffffffff; 1555 } else { 1556 np->neth_nam = nam; 1557 np->neth_msk = msk; 1558 } 1559 np->neth_exflags = argp->exflags; 1560 np->neth_anon = argp->anon; 1561 np->neth_anon.cr_ref = 1; 1562 np->neth_next = *hnp; 1563 *hnp = np; 1564 return (0); 1565 } 1566 1567 /* 1568 * Free the net address hash lists that are hanging off the mount points. 1569 */ 1570 free_addrlist(ump) 1571 struct ufsmount *ump; 1572 { 1573 register struct netaddrhash *np, *onp; 1574 register int i; 1575 1576 for (i = 0; i <= NETHASHSZ; i++) { 1577 np = ump->um_netaddr[i]; 1578 ump->um_netaddr[i] = (struct netaddrhash *)0; 1579 while (np) { 1580 onp = np; 1581 np = np->neth_next; 1582 if (onp->neth_family != AF_INET) { 1583 m_freem(onp->neth_nam); 1584 m_freem(onp->neth_msk); 1585 } 1586 free((caddr_t)onp, M_NETADDR); 1587 } 1588 } 1589 } 1590 1591 /* 1592 * Generate a hash code for an iso host address. Used by NETADDRHASH() for 1593 * iso addresses. 1594 */ 1595 iso_addrhash(saddr) 1596 struct sockaddr *saddr; 1597 { 1598 #ifdef ISO 1599 register struct sockaddr_iso *siso; 1600 register int i, sum; 1601 1602 sum = 0; 1603 for (i = 0; i < siso->siso_nlen; i++) 1604 sum += siso->siso_data[i]; 1605 return (sum & (NETHASHSZ - 1)); 1606 #else 1607 return (0); 1608 #endif /* ISO */ 1609 } 1610 1611 /* 1612 * Check for badly aligned mbuf data areas and 1613 * realign data in an mbuf list by copying the data areas up, as required. 1614 */ 1615 void 1616 nfs_realign(m, hsiz) 1617 register struct mbuf *m; 1618 int hsiz; 1619 { 1620 register struct mbuf *m2; 1621 register int siz, mlen, olen; 1622 register caddr_t tcp, fcp; 1623 struct mbuf *mnew; 1624 1625 while (m) { 1626 /* 1627 * This never happens for UDP, rarely happens for TCP 1628 * but frequently happens for iso transport. 1629 */ 1630 if ((m->m_len & 0x3) || (mtod(m, int) & 0x3)) { 1631 olen = m->m_len; 1632 fcp = mtod(m, caddr_t); 1633 m->m_flags &= ~M_PKTHDR; 1634 if (m->m_flags & M_EXT) 1635 m->m_data = m->m_ext.ext_buf; 1636 else 1637 m->m_data = m->m_dat; 1638 m->m_len = 0; 1639 tcp = mtod(m, caddr_t); 1640 mnew = m; 1641 m2 = m->m_next; 1642 1643 /* 1644 * If possible, only put the first invariant part 1645 * of the RPC header in the first mbuf. 1646 */ 1647 if (olen <= hsiz) 1648 mlen = hsiz; 1649 else 1650 mlen = M_TRAILINGSPACE(m); 1651 1652 /* 1653 * Loop through the mbuf list consolidating data. 1654 */ 1655 while (m) { 1656 while (olen > 0) { 1657 if (mlen == 0) { 1658 m2->m_flags &= ~M_PKTHDR; 1659 if (m2->m_flags & M_EXT) 1660 m2->m_data = m2->m_ext.ext_buf; 1661 else 1662 m2->m_data = m2->m_dat; 1663 m2->m_len = 0; 1664 mlen = M_TRAILINGSPACE(m2); 1665 tcp = mtod(m2, caddr_t); 1666 mnew = m2; 1667 m2 = m2->m_next; 1668 } 1669 siz = MIN(mlen, olen); 1670 if (tcp != fcp) 1671 bcopy(fcp, tcp, siz); 1672 mnew->m_len += siz; 1673 mlen -= siz; 1674 olen -= siz; 1675 tcp += siz; 1676 fcp += siz; 1677 } 1678 m = m->m_next; 1679 if (m) { 1680 olen = m->m_len; 1681 fcp = mtod(m, caddr_t); 1682 } 1683 } 1684 1685 /* 1686 * Finally, set m_len == 0 for any trailing mbufs that have 1687 * been copied out of. 1688 */ 1689 while (m2) { 1690 m2->m_len = 0; 1691 m2 = m2->m_next; 1692 } 1693 return; 1694 } 1695 m = m->m_next; 1696 } 1697 } 1698 1699 /* 1700 * Socket upcall routine for the nfsd sockets. 1701 * The caddr_t arg is a pointer to the "struct nfssvc_sock". 1702 * Essentially do as much as possible non-blocking, else punt and it will 1703 * be called with M_WAIT from an nfsd. 1704 */ 1705 void 1706 nfsrv_rcv(so, arg, waitflag) 1707 struct socket *so; 1708 caddr_t arg; 1709 int waitflag; 1710 { 1711 register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg; 1712 register struct mbuf *m; 1713 struct mbuf *mp, *nam; 1714 struct uio auio; 1715 int flags, error; 1716 1717 if ((slp->ns_flag & SLP_VALID) == 0) 1718 return; 1719 #ifdef notdef 1720 /* 1721 * Define this to test for nfsds handling this under heavy load. 1722 */ 1723 if (waitflag == M_DONTWAIT) { 1724 slp->ns_flag |= SLP_NEEDQ; goto dorecs; 1725 } 1726 #endif 1727 auio.uio_procp = NULL; 1728 if (so->so_type == SOCK_STREAM) { 1729 /* 1730 * If there are already records on the queue, defer soreceive() 1731 * to an nfsd so that there is feedback to the TCP layer that 1732 * the nfs servers are heavily loaded. 1733 */ 1734 if (slp->ns_rec && waitflag == M_DONTWAIT) { 1735 slp->ns_flag |= SLP_NEEDQ; 1736 goto dorecs; 1737 } 1738 1739 /* 1740 * Do soreceive(). 1741 */ 1742 auio.uio_resid = 1000000000; 1743 flags = MSG_DONTWAIT; 1744 error = soreceive(so, &nam, &auio, &mp, (struct mbuf **)0, &flags); 1745 if (error || mp == (struct mbuf *)0) { 1746 if (error == EWOULDBLOCK) 1747 slp->ns_flag |= SLP_NEEDQ; 1748 else 1749 slp->ns_flag |= SLP_DISCONN; 1750 goto dorecs; 1751 } 1752 m = mp; 1753 if (slp->ns_rawend) { 1754 slp->ns_rawend->m_next = m; 1755 slp->ns_cc += 1000000000 - auio.uio_resid; 1756 } else { 1757 slp->ns_raw = m; 1758 slp->ns_cc = 1000000000 - auio.uio_resid; 1759 } 1760 while (m->m_next) 1761 m = m->m_next; 1762 slp->ns_rawend = m; 1763 1764 /* 1765 * Now try and parse record(s) out of the raw stream data. 1766 */ 1767 if (error = nfsrv_getstream(slp, waitflag)) { 1768 if (error == EPERM) 1769 slp->ns_flag |= SLP_DISCONN; 1770 else 1771 slp->ns_flag |= SLP_NEEDQ; 1772 } 1773 } else { 1774 do { 1775 auio.uio_resid = 1000000000; 1776 flags = MSG_DONTWAIT; 1777 error = soreceive(so, &nam, &auio, &mp, 1778 (struct mbuf **)0, &flags); 1779 if (mp) { 1780 nfs_realign(mp, 10 * NFSX_UNSIGNED); 1781 if (nam) { 1782 m = nam; 1783 m->m_next = mp; 1784 } else 1785 m = mp; 1786 if (slp->ns_recend) 1787 slp->ns_recend->m_nextpkt = m; 1788 else 1789 slp->ns_rec = m; 1790 slp->ns_recend = m; 1791 m->m_nextpkt = (struct mbuf *)0; 1792 } 1793 if (error) { 1794 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) 1795 && error != EWOULDBLOCK) { 1796 slp->ns_flag |= SLP_DISCONN; 1797 goto dorecs; 1798 } 1799 } 1800 } while (mp); 1801 } 1802 1803 /* 1804 * Now try and process the request records, non-blocking. 1805 */ 1806 dorecs: 1807 if (waitflag == M_DONTWAIT && 1808 (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)))) 1809 nfsrv_wakenfsd(slp); 1810 } 1811 1812 /* 1813 * Try and extract an RPC request from the mbuf data list received on a 1814 * stream socket. The "waitflag" argument indicates whether or not it 1815 * can sleep. 1816 */ 1817 nfsrv_getstream(slp, waitflag) 1818 register struct nfssvc_sock *slp; 1819 int waitflag; 1820 { 1821 register struct mbuf *m; 1822 register char *cp1, *cp2; 1823 register int len; 1824 struct mbuf *om, *m2, *recm; 1825 u_long recmark; 1826 1827 if (slp->ns_flag & SLP_GETSTREAM) 1828 panic("nfs getstream"); 1829 slp->ns_flag |= SLP_GETSTREAM; 1830 for (;;) { 1831 if (slp->ns_reclen == 0) { 1832 if (slp->ns_cc < NFSX_UNSIGNED) { 1833 slp->ns_flag &= ~SLP_GETSTREAM; 1834 return (0); 1835 } 1836 m = slp->ns_raw; 1837 if (m->m_len >= NFSX_UNSIGNED) { 1838 bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED); 1839 m->m_data += NFSX_UNSIGNED; 1840 m->m_len -= NFSX_UNSIGNED; 1841 } else { 1842 cp1 = (caddr_t)&recmark; 1843 cp2 = mtod(m, caddr_t); 1844 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) { 1845 while (m->m_len == 0) { 1846 m = m->m_next; 1847 cp2 = mtod(m, caddr_t); 1848 } 1849 *cp1++ = *cp2++; 1850 m->m_data++; 1851 m->m_len--; 1852 } 1853 } 1854 slp->ns_cc -= NFSX_UNSIGNED; 1855 slp->ns_reclen = ntohl(recmark) & ~0x80000000; 1856 if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) { 1857 slp->ns_flag &= ~SLP_GETSTREAM; 1858 return (EPERM); 1859 } 1860 } 1861 1862 /* 1863 * Now get the record part. 1864 */ 1865 if (slp->ns_cc == slp->ns_reclen) { 1866 recm = slp->ns_raw; 1867 slp->ns_raw = slp->ns_rawend = (struct mbuf *)0; 1868 slp->ns_cc = slp->ns_reclen = 0; 1869 } else if (slp->ns_cc > slp->ns_reclen) { 1870 len = 0; 1871 m = slp->ns_raw; 1872 om = (struct mbuf *)0; 1873 while (len < slp->ns_reclen) { 1874 if ((len + m->m_len) > slp->ns_reclen) { 1875 m2 = m_copym(m, 0, slp->ns_reclen - len, 1876 waitflag); 1877 if (m2) { 1878 if (om) { 1879 om->m_next = m2; 1880 recm = slp->ns_raw; 1881 } else 1882 recm = m2; 1883 m->m_data += slp->ns_reclen - len; 1884 m->m_len -= slp->ns_reclen - len; 1885 len = slp->ns_reclen; 1886 } else { 1887 slp->ns_flag &= ~SLP_GETSTREAM; 1888 return (EWOULDBLOCK); 1889 } 1890 } else if ((len + m->m_len) == slp->ns_reclen) { 1891 om = m; 1892 len += m->m_len; 1893 m = m->m_next; 1894 recm = slp->ns_raw; 1895 om->m_next = (struct mbuf *)0; 1896 } else { 1897 om = m; 1898 len += m->m_len; 1899 m = m->m_next; 1900 } 1901 } 1902 slp->ns_raw = m; 1903 slp->ns_cc -= len; 1904 slp->ns_reclen = 0; 1905 } else { 1906 slp->ns_flag &= ~SLP_GETSTREAM; 1907 return (0); 1908 } 1909 nfs_realign(recm, 10 * NFSX_UNSIGNED); 1910 if (slp->ns_recend) 1911 slp->ns_recend->m_nextpkt = recm; 1912 else 1913 slp->ns_rec = recm; 1914 slp->ns_recend = recm; 1915 } 1916 } 1917 1918 /* 1919 * Parse an RPC header. 1920 */ 1921 nfsrv_dorec(slp, nd) 1922 register struct nfssvc_sock *slp; 1923 register struct nfsd *nd; 1924 { 1925 register struct mbuf *m; 1926 int error; 1927 1928 if ((slp->ns_flag & SLP_VALID) == 0 || 1929 (m = slp->ns_rec) == (struct mbuf *)0) 1930 return (ENOBUFS); 1931 if (slp->ns_rec = m->m_nextpkt) 1932 m->m_nextpkt = (struct mbuf *)0; 1933 else 1934 slp->ns_recend = (struct mbuf *)0; 1935 if (m->m_type == MT_SONAME) { 1936 nd->nd_nam = m; 1937 nd->nd_md = nd->nd_mrep = m->m_next; 1938 m->m_next = (struct mbuf *)0; 1939 } else { 1940 nd->nd_nam = (struct mbuf *)0; 1941 nd->nd_md = nd->nd_mrep = m; 1942 } 1943 nd->nd_dpos = mtod(nd->nd_md, caddr_t); 1944 if (error = nfs_getreq(nd, TRUE)) { 1945 m_freem(nd->nd_nam); 1946 return (error); 1947 } 1948 return (0); 1949 } 1950 1951 /* 1952 * Parse an RPC request 1953 * - verify it 1954 * - fill in the cred struct. 1955 */ 1956 nfs_getreq(nd, has_header) 1957 register struct nfsd *nd; 1958 int has_header; 1959 { 1960 register int len, i; 1961 register u_long *tl; 1962 register long t1; 1963 struct uio uio; 1964 struct iovec iov; 1965 caddr_t dpos, cp2; 1966 u_long nfsvers, auth_type; 1967 int error = 0, nqnfs = 0; 1968 struct mbuf *mrep, *md; 1969 1970 mrep = nd->nd_mrep; 1971 md = nd->nd_md; 1972 dpos = nd->nd_dpos; 1973 if (has_header) { 1974 nfsm_dissect(tl, u_long *, 10*NFSX_UNSIGNED); 1975 nd->nd_retxid = *tl++; 1976 if (*tl++ != rpc_call) { 1977 m_freem(mrep); 1978 return (EBADRPC); 1979 } 1980 } else { 1981 nfsm_dissect(tl, u_long *, 8*NFSX_UNSIGNED); 1982 } 1983 nd->nd_repstat = 0; 1984 if (*tl++ != rpc_vers) { 1985 nd->nd_repstat = ERPCMISMATCH; 1986 nd->nd_procnum = NFSPROC_NOOP; 1987 return (0); 1988 } 1989 nfsvers = nfs_vers; 1990 if (*tl != nfs_prog) { 1991 if (*tl == nqnfs_prog) { 1992 nqnfs++; 1993 nfsvers = nqnfs_vers; 1994 } else { 1995 nd->nd_repstat = EPROGUNAVAIL; 1996 nd->nd_procnum = NFSPROC_NOOP; 1997 return (0); 1998 } 1999 } 2000 tl++; 2001 if (*tl++ != nfsvers) { 2002 nd->nd_repstat = EPROGMISMATCH; 2003 nd->nd_procnum = NFSPROC_NOOP; 2004 return (0); 2005 } 2006 nd->nd_procnum = fxdr_unsigned(u_long, *tl++); 2007 if (nd->nd_procnum == NFSPROC_NULL) 2008 return (0); 2009 if (nd->nd_procnum >= NFS_NPROCS || 2010 (!nqnfs && nd->nd_procnum > NFSPROC_STATFS) || 2011 (*tl != rpc_auth_unix && *tl != rpc_auth_kerb)) { 2012 nd->nd_repstat = EPROCUNAVAIL; 2013 nd->nd_procnum = NFSPROC_NOOP; 2014 return (0); 2015 } 2016 auth_type = *tl++; 2017 len = fxdr_unsigned(int, *tl++); 2018 if (len < 0 || len > RPCAUTH_MAXSIZ) { 2019 m_freem(mrep); 2020 return (EBADRPC); 2021 } 2022 2023 /* 2024 * Handle auth_unix or auth_kerb. 2025 */ 2026 if (auth_type == rpc_auth_unix) { 2027 len = fxdr_unsigned(int, *++tl); 2028 if (len < 0 || len > NFS_MAXNAMLEN) { 2029 m_freem(mrep); 2030 return (EBADRPC); 2031 } 2032 nfsm_adv(nfsm_rndup(len)); 2033 nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED); 2034 nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++); 2035 nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++); 2036 len = fxdr_unsigned(int, *tl); 2037 if (len < 0 || len > RPCAUTH_UNIXGIDS) { 2038 m_freem(mrep); 2039 return (EBADRPC); 2040 } 2041 nfsm_dissect(tl, u_long *, (len + 2)*NFSX_UNSIGNED); 2042 for (i = 1; i <= len; i++) 2043 if (i < NGROUPS) 2044 nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++); 2045 else 2046 tl++; 2047 nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1); 2048 } else if (auth_type == rpc_auth_kerb) { 2049 nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++); 2050 nd->nd_authlen = fxdr_unsigned(int, *tl); 2051 iov.iov_len = uio.uio_resid = nfsm_rndup(nd->nd_authlen); 2052 if (uio.uio_resid > (len - 2*NFSX_UNSIGNED)) { 2053 m_freem(mrep); 2054 return (EBADRPC); 2055 } 2056 uio.uio_offset = 0; 2057 uio.uio_iov = &iov; 2058 uio.uio_iovcnt = 1; 2059 uio.uio_segflg = UIO_SYSSPACE; 2060 iov.iov_base = (caddr_t)nd->nd_authstr; 2061 nfsm_mtouio(&uio, uio.uio_resid); 2062 nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED); 2063 nd->nd_flag |= NFSD_NEEDAUTH; 2064 } 2065 2066 /* 2067 * Do we have any use for the verifier. 2068 * According to the "Remote Procedure Call Protocol Spec." it 2069 * should be AUTH_NULL, but some clients make it AUTH_UNIX? 2070 * For now, just skip over it 2071 */ 2072 len = fxdr_unsigned(int, *++tl); 2073 if (len < 0 || len > RPCAUTH_MAXSIZ) { 2074 m_freem(mrep); 2075 return (EBADRPC); 2076 } 2077 if (len > 0) { 2078 nfsm_adv(nfsm_rndup(len)); 2079 } 2080 2081 /* 2082 * For nqnfs, get piggybacked lease request. 2083 */ 2084 if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) { 2085 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 2086 nd->nd_nqlflag = fxdr_unsigned(int, *tl); 2087 if (nd->nd_nqlflag) { 2088 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 2089 nd->nd_duration = fxdr_unsigned(int, *tl); 2090 } else 2091 nd->nd_duration = NQ_MINLEASE; 2092 } else { 2093 nd->nd_nqlflag = NQL_NOVAL; 2094 nd->nd_duration = NQ_MINLEASE; 2095 } 2096 nd->nd_md = md; 2097 nd->nd_dpos = dpos; 2098 return (0); 2099 nfsmout: 2100 return (error); 2101 } 2102 2103 /* 2104 * Search for a sleeping nfsd and wake it up. 2105 * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the 2106 * running nfsds will go look for the work in the nfssvc_sock list. 2107 */ 2108 void 2109 nfsrv_wakenfsd(slp) 2110 struct nfssvc_sock *slp; 2111 { 2112 register struct nfsd *nd = nfsd_head.nd_next; 2113 2114 if ((slp->ns_flag & SLP_VALID) == 0) 2115 return; 2116 while (nd != (struct nfsd *)&nfsd_head) { 2117 if (nd->nd_flag & NFSD_WAITING) { 2118 nd->nd_flag &= ~NFSD_WAITING; 2119 if (nd->nd_slp) 2120 panic("nfsd wakeup"); 2121 nd->nd_slp = slp; 2122 wakeup((caddr_t)nd); 2123 return; 2124 } 2125 nd = nd->nd_next; 2126 } 2127 slp->ns_flag |= SLP_DOREC; 2128 nfsd_head.nd_flag |= NFSD_CHECKSLP; 2129 } 2130 2131 nfs_msg(p, server, msg) 2132 struct proc *p; 2133 char *server, *msg; 2134 { 2135 tpr_t tpr; 2136 2137 if (p) 2138 tpr = tprintf_open(p); 2139 else 2140 tpr = NULL; 2141 tprintf(tpr, "nfs server %s: %s\n", server, msg); 2142 tprintf_close(tpr); 2143 } 2144