1 /* 2 * Copyright (c) 1989, 1991 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * %sccs.include.redist.c% 9 * 10 * @(#)nfs_socket.c 7.24 (Berkeley) 01/14/92 11 */ 12 13 /* 14 * Socket operations for use by nfs 15 */ 16 17 #include "types.h" 18 #include "param.h" 19 #include "uio.h" 20 #include "proc.h" 21 #include "signal.h" 22 #include "mount.h" 23 #include "kernel.h" 24 #include "malloc.h" 25 #include "mbuf.h" 26 #include "vnode.h" 27 #include "domain.h" 28 #include "protosw.h" 29 #include "socket.h" 30 #include "socketvar.h" 31 #include "syslog.h" 32 #include "tprintf.h" 33 #include "machine/endian.h" 34 #include "netinet/in.h" 35 #include "netinet/tcp.h" 36 #ifdef ISO 37 #include "netiso/iso.h" 38 #endif 39 #include "ufs/ufs/quota.h" 40 #include "ufs/ufs/ufsmount.h" 41 #include "rpcv2.h" 42 #include "nfsv2.h" 43 #include "nfs.h" 44 #include "xdr_subs.h" 45 #include "nfsm_subs.h" 46 #include "nfsmount.h" 47 #include "nfsnode.h" 48 #include "nfsrtt.h" 49 #include "nqnfs.h" 50 51 #include "syslog.h" 52 53 #define TRUE 1 54 #define FALSE 0 55 56 int netnetnet = sizeof (struct netaddrhash); 57 /* 58 * Estimate rto for an nfs rpc sent via. an unreliable datagram. 59 * Use the mean and mean deviation of rtt for the appropriate type of rpc 60 * for the frequent rpcs and a default for the others. 61 * The justification for doing "other" this way is that these rpcs 62 * happen so infrequently that timer est. would probably be stale. 63 * Also, since many of these rpcs are 64 * non-idempotent, a conservative timeout is desired. 65 * getattr, lookup - A+2D 66 * read, write - A+4D 67 * other - nm_timeo 68 */ 69 #define NFS_RTO(n, t) \ 70 ((t) == 0 ? (n)->nm_timeo : \ 71 ((t) < 3 ? \ 72 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \ 73 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1))) 74 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1] 75 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1] 76 /* 77 * External data, mostly RPC constants in XDR form 78 */ 79 extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix, 80 rpc_msgaccepted, rpc_call, rpc_autherr, rpc_rejectedcred, 81 rpc_auth_kerb; 82 extern u_long nfs_prog, nfs_vers, nqnfs_prog, nqnfs_vers; 83 extern time_t nqnfsstarttime; 84 extern int nonidempotent[NFS_NPROCS]; 85 86 /* 87 * Maps errno values to nfs error numbers. 88 * Use NFSERR_IO as the catch all for ones not specifically defined in 89 * RFC 1094. 90 */ 91 static int nfsrv_errmap[ELAST] = { 92 NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO, 93 NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 94 NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO, 95 NFSERR_IO, NFSERR_EXIST, NFSERR_IO, NFSERR_NODEV, NFSERR_NOTDIR, 96 NFSERR_ISDIR, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 97 NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS, 98 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 99 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 100 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 101 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 102 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 103 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 104 NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO, 105 NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE, 106 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 107 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 108 NFSERR_IO, 109 }; 110 111 /* 112 * Defines which timer to use for the procnum. 113 * 0 - default 114 * 1 - getattr 115 * 2 - lookup 116 * 3 - read 117 * 4 - write 118 */ 119 static int proct[NFS_NPROCS] = { 120 0, 1, 0, 0, 2, 3, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 0, 121 }; 122 123 /* 124 * There is a congestion window for outstanding rpcs maintained per mount 125 * point. The cwnd size is adjusted in roughly the way that: 126 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of 127 * SIGCOMM '88". ACM, August 1988. 128 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout 129 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd 130 * of rpcs is in progress. 131 * (The sent count and cwnd are scaled for integer arith.) 132 * Variants of "slow start" were tried and were found to be too much of a 133 * performance hit (ave. rtt 3 times larger), 134 * I suspect due to the large rtt that nfs rpcs have. 135 */ 136 #define NFS_CWNDSCALE 256 137 #define NFS_MAXCWND (NFS_CWNDSCALE * 32) 138 static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, }; 139 int nfs_sbwait(); 140 void nfs_disconnect(), nfs_realign(), nfsrv_wakenfsd(), nfs_sndunlock(); 141 void nfs_rcvunlock(), nqnfs_serverd(); 142 struct mbuf *nfsm_rpchead(); 143 int nfsrtton = 0; 144 struct nfsrtt nfsrtt; 145 struct nfsd nfsd_head; 146 147 int nfsrv_null(), 148 nfsrv_getattr(), 149 nfsrv_setattr(), 150 nfsrv_lookup(), 151 nfsrv_readlink(), 152 nfsrv_read(), 153 nfsrv_write(), 154 nfsrv_create(), 155 nfsrv_remove(), 156 nfsrv_rename(), 157 nfsrv_link(), 158 nfsrv_symlink(), 159 nfsrv_mkdir(), 160 nfsrv_rmdir(), 161 nfsrv_readdir(), 162 nfsrv_statfs(), 163 nfsrv_noop(), 164 nqnfsrv_readdirlook(), 165 nqnfsrv_getlease(), 166 nqnfsrv_vacated(); 167 168 int (*nfsrv_procs[NFS_NPROCS])() = { 169 nfsrv_null, 170 nfsrv_getattr, 171 nfsrv_setattr, 172 nfsrv_noop, 173 nfsrv_lookup, 174 nfsrv_readlink, 175 nfsrv_read, 176 nfsrv_noop, 177 nfsrv_write, 178 nfsrv_create, 179 nfsrv_remove, 180 nfsrv_rename, 181 nfsrv_link, 182 nfsrv_symlink, 183 nfsrv_mkdir, 184 nfsrv_rmdir, 185 nfsrv_readdir, 186 nfsrv_statfs, 187 nqnfsrv_readdirlook, 188 nqnfsrv_getlease, 189 nqnfsrv_vacated, 190 }; 191 192 struct nfsreq nfsreqh; 193 194 /* 195 * Initialize sockets and congestion for a new NFS connection. 196 * We do not free the sockaddr if error. 197 */ 198 nfs_connect(nmp, rep) 199 register struct nfsmount *nmp; 200 struct nfsreq *rep; 201 { 202 register struct socket *so; 203 int s, error, rcvreserve, sndreserve; 204 struct mbuf *m; 205 206 nmp->nm_so = (struct socket *)0; 207 if (error = socreate(mtod(nmp->nm_nam, struct sockaddr *)->sa_family, 208 &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto)) 209 goto bad; 210 so = nmp->nm_so; 211 nmp->nm_soflags = so->so_proto->pr_flags; 212 213 /* 214 * Protocols that do not require connections may be optionally left 215 * unconnected for servers that reply from a port other than NFS_PORT. 216 */ 217 if (nmp->nm_flag & NFSMNT_NOCONN) { 218 if (nmp->nm_soflags & PR_CONNREQUIRED) { 219 error = ENOTCONN; 220 goto bad; 221 } 222 } else { 223 if (error = soconnect(so, nmp->nm_nam)) 224 goto bad; 225 226 /* 227 * Wait for the connection to complete. Cribbed from the 228 * connect system call but with the wait timing out so 229 * that interruptible mounts don't hang here for a long time. 230 */ 231 s = splnet(); 232 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 233 (void) tsleep((caddr_t)&so->so_timeo, PSOCK, 234 "nfscon", 2 * hz); 235 if ((so->so_state & SS_ISCONNECTING) && 236 so->so_error == 0 && rep && 237 (error = nfs_sigintr(nmp, rep, rep->r_procp))) { 238 so->so_state &= ~SS_ISCONNECTING; 239 splx(s); 240 goto bad; 241 } 242 } 243 if (so->so_error) { 244 error = so->so_error; 245 so->so_error = 0; 246 splx(s); 247 goto bad; 248 } 249 splx(s); 250 } 251 if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) { 252 so->so_rcv.sb_timeo = (5 * hz); 253 so->so_snd.sb_timeo = (5 * hz); 254 } else { 255 so->so_rcv.sb_timeo = 0; 256 so->so_snd.sb_timeo = 0; 257 } 258 if (nmp->nm_sotype == SOCK_DGRAM) { 259 sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR; 260 rcvreserve = nmp->nm_rsize + NFS_MAXPKTHDR; 261 } else if (nmp->nm_sotype == SOCK_SEQPACKET) { 262 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2; 263 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2; 264 } else { 265 if (nmp->nm_sotype != SOCK_STREAM) 266 panic("nfscon sotype"); 267 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 268 MGET(m, M_WAIT, MT_SOOPTS); 269 *mtod(m, int *) = 1; 270 m->m_len = sizeof(int); 271 sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m); 272 } 273 if (so->so_proto->pr_protocol == IPPROTO_TCP) { 274 MGET(m, M_WAIT, MT_SOOPTS); 275 *mtod(m, int *) = 1; 276 m->m_len = sizeof(int); 277 sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m); 278 } 279 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long)) 280 * 2; 281 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long)) 282 * 2; 283 } 284 if (error = soreserve(so, sndreserve, rcvreserve)) 285 goto bad; 286 so->so_rcv.sb_flags |= SB_NOINTR; 287 so->so_snd.sb_flags |= SB_NOINTR; 288 289 /* Initialize other non-zero congestion variables */ 290 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] = 291 nmp->nm_srtt[4] = (NFS_TIMEO << 3); 292 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] = 293 nmp->nm_sdrtt[3] = nmp->nm_sdrtt[4] = 0; 294 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */ 295 nmp->nm_sent = 0; 296 nmp->nm_timeouts = 0; 297 return (0); 298 299 bad: 300 nfs_disconnect(nmp); 301 return (error); 302 } 303 304 /* 305 * Reconnect routine: 306 * Called when a connection is broken on a reliable protocol. 307 * - clean up the old socket 308 * - nfs_connect() again 309 * - set R_MUSTRESEND for all outstanding requests on mount point 310 * If this fails the mount point is DEAD! 311 * nb: Must be called with the nfs_sndlock() set on the mount point. 312 */ 313 nfs_reconnect(rep) 314 register struct nfsreq *rep; 315 { 316 register struct nfsreq *rp; 317 register struct nfsmount *nmp = rep->r_nmp; 318 int error; 319 320 nfs_disconnect(nmp); 321 nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname, 322 "trying reconnect"); 323 while (error = nfs_connect(nmp, rep)) { 324 if (error == EINTR || error == ERESTART) 325 return (EINTR); 326 (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); 327 } 328 nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname, 329 "reconnected"); 330 331 /* 332 * Loop through outstanding request list and fix up all requests 333 * on old socket. 334 */ 335 rp = nfsreqh.r_next; 336 while (rp != &nfsreqh) { 337 if (rp->r_nmp == nmp) 338 rp->r_flags |= R_MUSTRESEND; 339 rp = rp->r_next; 340 } 341 return (0); 342 } 343 344 /* 345 * NFS disconnect. Clean up and unlink. 346 */ 347 void 348 nfs_disconnect(nmp) 349 register struct nfsmount *nmp; 350 { 351 register struct socket *so; 352 353 if (nmp->nm_so) { 354 so = nmp->nm_so; 355 nmp->nm_so = (struct socket *)0; 356 soshutdown(so, 2); 357 soclose(so); 358 } 359 } 360 361 /* 362 * This is the nfs send routine. For connection based socket types, it 363 * must be called with an nfs_sndlock() on the socket. 364 * "rep == NULL" indicates that it has been called from a server. 365 * For the client side: 366 * - return EINTR if the RPC is terminated, 0 otherwise 367 * - set R_MUSTRESEND if the send fails for any reason 368 * - do any cleanup required by recoverable socket errors (???) 369 * For the server side: 370 * - return EINTR or ERESTART if interrupted by a signal 371 * - return EPIPE if a connection is lost for connection based sockets (TCP...) 372 * - do any cleanup required by recoverable socket errors (???) 373 */ 374 nfs_send(so, nam, top, rep) 375 register struct socket *so; 376 struct mbuf *nam; 377 register struct mbuf *top; 378 struct nfsreq *rep; 379 { 380 struct mbuf *sendnam; 381 int error, soflags, flags; 382 383 if (rep) { 384 if (rep->r_flags & R_SOFTTERM) { 385 m_freem(top); 386 return (EINTR); 387 } 388 if ((so = rep->r_nmp->nm_so) == NULL) { 389 rep->r_flags |= R_MUSTRESEND; 390 m_freem(top); 391 return (0); 392 } 393 rep->r_flags &= ~R_MUSTRESEND; 394 soflags = rep->r_nmp->nm_soflags; 395 } else 396 soflags = so->so_proto->pr_flags; 397 if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) 398 sendnam = (struct mbuf *)0; 399 else 400 sendnam = nam; 401 if (so->so_type == SOCK_SEQPACKET) 402 flags = MSG_EOR; 403 else 404 flags = 0; 405 406 error = sosend(so, sendnam, (struct uio *)0, top, 407 (struct mbuf *)0, flags); 408 if(error) printf("nfssnd err=%d\n",error); 409 if (error) { 410 if (rep) { 411 /* 412 * Deal with errors for the client side. 413 */ 414 if (rep->r_flags & R_SOFTTERM) 415 error = EINTR; 416 else 417 rep->r_flags |= R_MUSTRESEND; 418 } 419 420 /* 421 * Handle any recoverable (soft) socket errors here. (???) 422 */ 423 if (error != EINTR && error != ERESTART && 424 error != EWOULDBLOCK && error != EPIPE) 425 error = 0; 426 } 427 return (error); 428 } 429 430 /* 431 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all 432 * done by soreceive(), but for SOCK_STREAM we must deal with the Record 433 * Mark and consolidate the data into a new mbuf list. 434 * nb: Sometimes TCP passes the data up to soreceive() in long lists of 435 * small mbufs. 436 * For SOCK_STREAM we must be very careful to read an entire record once 437 * we have read any of it, even if the system call has been interrupted. 438 */ 439 nfs_receive(rep, aname, mp) 440 register struct nfsreq *rep; 441 struct mbuf **aname; 442 struct mbuf **mp; 443 { 444 register struct socket *so; 445 struct uio auio; 446 struct iovec aio; 447 register struct mbuf *m; 448 struct mbuf *control; 449 u_long len; 450 struct mbuf **getnam; 451 int error, sotype, rcvflg; 452 453 /* 454 * Set up arguments for soreceive() 455 */ 456 *mp = (struct mbuf *)0; 457 *aname = (struct mbuf *)0; 458 sotype = rep->r_nmp->nm_sotype; 459 460 /* 461 * For reliable protocols, lock against other senders/receivers 462 * in case a reconnect is necessary. 463 * For SOCK_STREAM, first get the Record Mark to find out how much 464 * more there is to get. 465 * We must lock the socket against other receivers 466 * until we have an entire rpc request/reply. 467 */ 468 if (sotype != SOCK_DGRAM) { 469 if (error = nfs_sndlock(&rep->r_nmp->nm_flag, rep)) 470 return (error); 471 tryagain: 472 /* 473 * Check for fatal errors and resending request. 474 */ 475 /* 476 * Ugh: If a reconnect attempt just happened, nm_so 477 * would have changed. NULL indicates a failed 478 * attempt that has essentially shut down this 479 * mount point. 480 */ 481 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) { 482 nfs_sndunlock(&rep->r_nmp->nm_flag); 483 return (EINTR); 484 } 485 if ((so = rep->r_nmp->nm_so) == NULL) { 486 if (error = nfs_reconnect(rep)) { 487 nfs_sndunlock(&rep->r_nmp->nm_flag); 488 return (error); 489 } 490 goto tryagain; 491 } 492 while (rep->r_flags & R_MUSTRESEND) { 493 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 494 nfsstats.rpcretries++; 495 if (error = nfs_send(so, rep->r_nmp->nm_nam, m, rep)) { 496 if (error == EINTR || error == ERESTART || 497 (error = nfs_reconnect(rep))) { 498 nfs_sndunlock(&rep->r_nmp->nm_flag); 499 return (error); 500 } 501 goto tryagain; 502 } 503 } 504 nfs_sndunlock(&rep->r_nmp->nm_flag); 505 if (sotype == SOCK_STREAM) { 506 aio.iov_base = (caddr_t) &len; 507 aio.iov_len = sizeof(u_long); 508 auio.uio_iov = &aio; 509 auio.uio_iovcnt = 1; 510 auio.uio_segflg = UIO_SYSSPACE; 511 auio.uio_rw = UIO_READ; 512 auio.uio_offset = 0; 513 auio.uio_resid = sizeof(u_long); 514 do { 515 rcvflg = MSG_WAITALL; 516 error = soreceive(so, (struct mbuf **)0, &auio, 517 (struct mbuf **)0, (struct mbuf **)0, &rcvflg); 518 if (error == EWOULDBLOCK && rep) { 519 if (rep->r_flags & R_SOFTTERM) 520 return (EINTR); 521 } 522 } while (error == EWOULDBLOCK); 523 if (!error && auio.uio_resid > 0) { 524 if (rep) 525 log(LOG_INFO, 526 "short receive (%d/%d) from nfs server %s\n", 527 sizeof(u_long) - auio.uio_resid, 528 sizeof(u_long), 529 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 530 error = EPIPE; 531 } 532 if (error) 533 goto errout; 534 len = ntohl(len) & ~0x80000000; 535 /* 536 * This is SERIOUS! We are out of sync with the sender 537 * and forcing a disconnect/reconnect is all I can do. 538 */ 539 if (len > NFS_MAXPACKET) { 540 if (rep) 541 log(LOG_ERR, "%s (%d) from nfs server %s\n", 542 "impossible packet length", 543 len, 544 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 545 error = EFBIG; 546 goto errout; 547 } 548 auio.uio_resid = len; 549 do { 550 rcvflg = MSG_WAITALL; 551 error = soreceive(so, (struct mbuf **)0, 552 &auio, mp, (struct mbuf **)0, &rcvflg); 553 } while (error == EWOULDBLOCK || error == EINTR || 554 error == ERESTART); 555 if (!error && auio.uio_resid > 0) { 556 if (rep) 557 log(LOG_INFO, 558 "short receive (%d/%d) from nfs server %s\n", 559 len - auio.uio_resid, len, 560 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 561 error = EPIPE; 562 } 563 } else { 564 /* 565 * NB: Since uio_resid is big, MSG_WAITALL is ignored 566 * and soreceive() will return when it has either a 567 * control msg or a data msg. 568 * We have no use for control msg., but must grab them 569 * and then throw them away so we know what is going 570 * on. 571 */ 572 auio.uio_resid = len = 100000000; /* Anything Big */ 573 do { 574 rcvflg = 0; 575 error = soreceive(so, (struct mbuf **)0, 576 &auio, mp, &control, &rcvflg); 577 if (control) 578 m_freem(control); 579 if (error == EWOULDBLOCK && rep) { 580 if (rep->r_flags & R_SOFTTERM) 581 return (EINTR); 582 } 583 } while (error == EWOULDBLOCK || 584 (!error && *mp == NULL && control)); 585 if ((rcvflg & MSG_EOR) == 0) 586 printf("Egad!!\n"); 587 if (!error && *mp == NULL) 588 error = EPIPE; 589 len -= auio.uio_resid; 590 } 591 errout: 592 if (error && error != EINTR && error != ERESTART) { 593 m_freem(*mp); 594 *mp = (struct mbuf *)0; 595 if (error != EPIPE && rep) 596 log(LOG_INFO, 597 "receive error %d from nfs server %s\n", 598 error, 599 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 600 error = nfs_sndlock(&rep->r_nmp->nm_flag, rep); 601 if (!error) 602 error = nfs_reconnect(rep); 603 if (!error) 604 goto tryagain; 605 } 606 } else { 607 if ((so = rep->r_nmp->nm_so) == NULL) 608 return (EACCES); 609 if (so->so_state & SS_ISCONNECTED) 610 getnam = (struct mbuf **)0; 611 else 612 getnam = aname; 613 auio.uio_resid = len = 1000000; 614 do { 615 rcvflg = 0; 616 error = soreceive(so, getnam, &auio, mp, 617 (struct mbuf **)0, &rcvflg); 618 if (error == EWOULDBLOCK && 619 (rep->r_flags & R_SOFTTERM)) 620 return (EINTR); 621 } while (error == EWOULDBLOCK); 622 len -= auio.uio_resid; 623 } 624 if (error) { 625 m_freem(*mp); 626 *mp = (struct mbuf *)0; 627 } 628 /* 629 * Search for any mbufs that are not a multiple of 4 bytes long 630 * or with m_data not longword aligned. 631 * These could cause pointer alignment problems, so copy them to 632 * well aligned mbufs. 633 */ 634 nfs_realign(*mp, 5 * NFSX_UNSIGNED); 635 return (error); 636 } 637 638 /* 639 * Implement receipt of reply on a socket. 640 * We must search through the list of received datagrams matching them 641 * with outstanding requests using the xid, until ours is found. 642 */ 643 /* ARGSUSED */ 644 nfs_reply(myrep) 645 struct nfsreq *myrep; 646 { 647 register struct nfsreq *rep; 648 register struct nfsmount *nmp = myrep->r_nmp; 649 register long t1; 650 struct mbuf *mrep, *nam, *md; 651 u_long rxid, *tl; 652 caddr_t dpos, cp2; 653 int error; 654 655 /* 656 * Loop around until we get our own reply 657 */ 658 for (;;) { 659 /* 660 * Lock against other receivers so that I don't get stuck in 661 * sbwait() after someone else has received my reply for me. 662 * Also necessary for connection based protocols to avoid 663 * race conditions during a reconnect. 664 */ 665 if (error = nfs_rcvlock(myrep)) 666 return (error); 667 /* Already received, bye bye */ 668 if (myrep->r_mrep != NULL) { 669 nfs_rcvunlock(&nmp->nm_flag); 670 return (0); 671 } 672 /* 673 * Get the next Rpc reply off the socket 674 */ 675 error = nfs_receive(myrep, &nam, &mrep); 676 nfs_rcvunlock(&nmp->nm_flag); 677 if (error) printf("rcv err=%d\n",error); 678 if (error) { 679 680 /* 681 * Ignore routing errors on connectionless protocols?? 682 */ 683 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { 684 nmp->nm_so->so_error = 0; 685 continue; 686 } 687 return (error); 688 } 689 if (nam) 690 m_freem(nam); 691 692 /* 693 * Get the xid and check that it is an rpc reply 694 */ 695 md = mrep; 696 dpos = mtod(md, caddr_t); 697 nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED); 698 rxid = *tl++; 699 if (*tl != rpc_reply) { 700 if (nmp->nm_flag & NFSMNT_NQNFS) { 701 if (nqnfs_callback(nmp, mrep, md, dpos)) 702 nfsstats.rpcinvalid++; 703 } else { 704 nfsstats.rpcinvalid++; 705 m_freem(mrep); 706 } 707 nfsmout: 708 continue; 709 } 710 711 /* 712 * Loop through the request list to match up the reply 713 * Iff no match, just drop the datagram 714 */ 715 rep = nfsreqh.r_next; 716 while (rep != &nfsreqh) { 717 if (rep->r_mrep == NULL && rxid == rep->r_xid) { 718 /* Found it.. */ 719 rep->r_mrep = mrep; 720 rep->r_md = md; 721 rep->r_dpos = dpos; 722 if (nfsrtton) { 723 struct rttl *rt; 724 725 rt = &nfsrtt.rttl[nfsrtt.pos]; 726 rt->proc = rep->r_procnum; 727 rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]); 728 rt->sent = nmp->nm_sent; 729 rt->cwnd = nmp->nm_cwnd; 730 rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1]; 731 rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1]; 732 rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid; 733 rt->tstamp = time; 734 if (rep->r_flags & R_TIMING) 735 rt->rtt = rep->r_rtt; 736 else 737 rt->rtt = 1000000; 738 nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ; 739 } 740 /* 741 * Update congestion window. 742 * Do the additive increase of 743 * one rpc/rtt. 744 */ 745 if (nmp->nm_cwnd <= nmp->nm_sent) { 746 nmp->nm_cwnd += 747 (NFS_CWNDSCALE * NFS_CWNDSCALE + 748 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; 749 if (nmp->nm_cwnd > NFS_MAXCWND) 750 nmp->nm_cwnd = NFS_MAXCWND; 751 } 752 nmp->nm_sent -= NFS_CWNDSCALE; 753 /* 754 * Update rtt using a gain of 0.125 on the mean 755 * and a gain of 0.25 on the deviation. 756 */ 757 if (rep->r_flags & R_TIMING) { 758 /* 759 * Since the timer resolution of 760 * NFS_HZ is so course, it can often 761 * result in r_rtt == 0. Since 762 * r_rtt == N means that the actual 763 * rtt is between N+dt and N+2-dt ticks, 764 * add 1. 765 */ 766 t1 = rep->r_rtt + 1; 767 t1 -= (NFS_SRTT(rep) >> 3); 768 NFS_SRTT(rep) += t1; 769 if (t1 < 0) 770 t1 = -t1; 771 t1 -= (NFS_SDRTT(rep) >> 2); 772 NFS_SDRTT(rep) += t1; 773 } 774 nmp->nm_timeouts = 0; 775 break; 776 } 777 rep = rep->r_next; 778 } 779 /* 780 * If not matched to a request, drop it. 781 * If it's mine, get out. 782 */ 783 if (rep == &nfsreqh) { 784 nfsstats.rpcunexpected++; 785 m_freem(mrep); 786 } else if (rep == myrep) 787 return (0); 788 } 789 } 790 791 /* 792 * nfs_request - goes something like this 793 * - fill in request struct 794 * - links it into list 795 * - calls nfs_send() for first transmit 796 * - calls nfs_receive() to get reply 797 * - break down rpc header and return with nfs reply pointed to 798 * by mrep or error 799 * nb: always frees up mreq mbuf list 800 */ 801 nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp) 802 struct vnode *vp; 803 struct mbuf *mrest; 804 int procnum; 805 struct proc *procp; 806 struct ucred *cred; 807 struct mbuf **mrp; 808 struct mbuf **mdp; 809 caddr_t *dposp; 810 { 811 register struct mbuf *m, *mrep; 812 register struct nfsreq *rep; 813 register u_long *tl; 814 register int i; 815 struct nfsmount *nmp; 816 struct mbuf *md, *mheadend; 817 struct nfsreq *reph; 818 struct nfsnode *tp, *np; 819 time_t reqtime, waituntil; 820 caddr_t dpos, cp2; 821 int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type; 822 int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0; 823 u_long xid; 824 char *auth_str; 825 826 nmp = VFSTONFS(vp->v_mount); 827 MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); 828 rep->r_nmp = nmp; 829 rep->r_vp = vp; 830 rep->r_procp = procp; 831 rep->r_procnum = procnum; 832 i = 0; 833 m = mrest; 834 while (m) { 835 i += m->m_len; 836 m = m->m_next; 837 } 838 mrest_len = i; 839 840 /* 841 * Get the RPC header with authorization. 842 */ 843 kerbauth: 844 auth_str = (char *)0; 845 if (nmp->nm_flag & NFSMNT_KERB) { 846 if (failed_auth) { 847 error = nfs_getauth(nmp, rep, cred, &auth_type, 848 &auth_str, &auth_len); 849 if (error) { 850 free((caddr_t)rep, M_NFSREQ); 851 m_freem(mrest); 852 return (error); 853 } 854 } else { 855 auth_type = RPCAUTH_UNIX; 856 auth_len = 5 * NFSX_UNSIGNED; 857 } 858 } else { 859 auth_type = RPCAUTH_UNIX; 860 auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ? 861 nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) + 862 5 * NFSX_UNSIGNED; 863 } 864 m = nfsm_rpchead(cred, (nmp->nm_flag & NFSMNT_NQNFS), procnum, 865 auth_type, auth_len, auth_str, mrest, mrest_len, &mheadend, &xid); 866 if (auth_str) 867 free(auth_str, M_TEMP); 868 869 /* 870 * For stream protocols, insert a Sun RPC Record Mark. 871 */ 872 if (nmp->nm_sotype == SOCK_STREAM) { 873 M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); 874 *mtod(m, u_long *) = htonl(0x80000000 | 875 (m->m_pkthdr.len - NFSX_UNSIGNED)); 876 } 877 rep->r_mreq = m; 878 rep->r_xid = xid; 879 tryagain: 880 if (nmp->nm_flag & NFSMNT_SOFT) 881 rep->r_retry = nmp->nm_retry; 882 else 883 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ 884 rep->r_rtt = rep->r_rexmit = 0; 885 if (proct[procnum] > 0) 886 rep->r_flags = R_TIMING; 887 else 888 rep->r_flags = 0; 889 rep->r_mrep = NULL; 890 891 /* 892 * Do the client side RPC. 893 */ 894 nfsstats.rpcrequests++; 895 /* 896 * Chain request into list of outstanding requests. Be sure 897 * to put it LAST so timer finds oldest requests first. 898 */ 899 s = splsoftclock(); 900 reph = &nfsreqh; 901 reph->r_prev->r_next = rep; 902 rep->r_prev = reph->r_prev; 903 reph->r_prev = rep; 904 rep->r_next = reph; 905 906 /* Get send time for nqnfs */ 907 reqtime = time.tv_sec; 908 909 /* 910 * If backing off another request or avoiding congestion, don't 911 * send this one now but let timer do it. If not timing a request, 912 * do it now. 913 */ 914 if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM || 915 (nmp->nm_flag & NFSMNT_DUMBTIMR) || 916 nmp->nm_sent < nmp->nm_cwnd)) { 917 splx(s); 918 if (nmp->nm_soflags & PR_CONNREQUIRED) 919 error = nfs_sndlock(&nmp->nm_flag, rep); 920 if (!error) { 921 m = m_copym(m, 0, M_COPYALL, M_WAIT); 922 error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep); 923 if (nmp->nm_soflags & PR_CONNREQUIRED) 924 nfs_sndunlock(&nmp->nm_flag); 925 } 926 if (!error && (rep->r_flags & R_MUSTRESEND) == 0) { 927 nmp->nm_sent += NFS_CWNDSCALE; 928 rep->r_flags |= R_SENT; 929 } 930 } else { 931 splx(s); 932 rep->r_rtt = -1; 933 } 934 935 /* 936 * Wait for the reply from our send or the timer's. 937 */ 938 if (!error) 939 error = nfs_reply(rep); 940 941 /* 942 * RPC done, unlink the request. 943 */ 944 s = splsoftclock(); 945 rep->r_prev->r_next = rep->r_next; 946 rep->r_next->r_prev = rep->r_prev; 947 splx(s); 948 949 /* 950 * If there was a successful reply and a tprintf msg. 951 * tprintf a response. 952 */ 953 if (!error && (rep->r_flags & R_TPRINTFMSG)) 954 nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname, 955 "is alive again"); 956 mrep = rep->r_mrep; 957 md = rep->r_md; 958 dpos = rep->r_dpos; 959 if (error) { 960 m_freem(rep->r_mreq); 961 free((caddr_t)rep, M_NFSREQ); 962 return (error); 963 } 964 965 /* 966 * break down the rpc header and check if ok 967 */ 968 nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED); 969 if (*tl++ == rpc_msgdenied) { 970 if (*tl == rpc_mismatch) 971 error = EOPNOTSUPP; 972 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) { 973 if (*tl == rpc_rejectedcred && failed_auth == 0) { 974 failed_auth++; 975 mheadend->m_next = (struct mbuf *)0; 976 m_freem(mrep); 977 m_freem(rep->r_mreq); 978 goto kerbauth; 979 } else 980 error = EAUTH; 981 } else 982 error = EACCES; 983 m_freem(mrep); 984 m_freem(rep->r_mreq); 985 free((caddr_t)rep, M_NFSREQ); 986 return (error); 987 } 988 989 /* 990 * skip over the auth_verf, someday we may want to cache auth_short's 991 * for nfs_reqhead(), but for now just dump it 992 */ 993 if (*++tl != 0) { 994 i = nfsm_rndup(fxdr_unsigned(long, *tl)); 995 nfsm_adv(i); 996 } 997 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 998 /* 0 == ok */ 999 if (*tl == 0) { 1000 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 1001 if (*tl != 0) { 1002 error = fxdr_unsigned(int, *tl); 1003 m_freem(mrep); 1004 if ((nmp->nm_flag & NFSMNT_NQNFS) && 1005 error == NQNFS_TRYLATER) { 1006 error = 0; 1007 waituntil = time.tv_sec + trylater_delay; 1008 while (time.tv_sec < waituntil) 1009 (void) tsleep((caddr_t)&lbolt, 1010 PSOCK, "nqnfstry", 0); 1011 trylater_delay *= nfs_backoff[trylater_cnt]; 1012 if (trylater_cnt < 7) 1013 trylater_cnt++; 1014 goto tryagain; 1015 } 1016 m_freem(rep->r_mreq); 1017 free((caddr_t)rep, M_NFSREQ); 1018 return (error); 1019 } 1020 1021 /* 1022 * For nqnfs, get any lease in reply 1023 */ 1024 if (nmp->nm_flag & NFSMNT_NQNFS) { 1025 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 1026 if (*tl) { 1027 np = VTONFS(vp); 1028 nqlflag = fxdr_unsigned(int, *tl); 1029 nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED); 1030 cachable = fxdr_unsigned(int, *tl++); 1031 reqtime += fxdr_unsigned(int, *tl++); 1032 if (reqtime > time.tv_sec) { 1033 if (np->n_tnext) { 1034 if (np->n_tnext == (struct nfsnode *)nmp) 1035 nmp->nm_tprev = np->n_tprev; 1036 else 1037 np->n_tnext->n_tprev = np->n_tprev; 1038 if (np->n_tprev == (struct nfsnode *)nmp) 1039 nmp->nm_tnext = np->n_tnext; 1040 else 1041 np->n_tprev->n_tnext = np->n_tnext; 1042 if (nqlflag == NQL_WRITE) 1043 np->n_flag |= NQNFSWRITE; 1044 } else if (nqlflag == NQL_READ) 1045 np->n_flag &= ~NQNFSWRITE; 1046 else 1047 np->n_flag |= NQNFSWRITE; 1048 if (cachable) 1049 np->n_flag &= ~NQNFSNONCACHE; 1050 else 1051 np->n_flag |= NQNFSNONCACHE; 1052 np->n_expiry = reqtime; 1053 fxdr_hyper(tl, &np->n_lrev); 1054 tp = nmp->nm_tprev; 1055 while (tp != (struct nfsnode *)nmp && 1056 tp->n_expiry > np->n_expiry) 1057 tp = tp->n_tprev; 1058 if (tp == (struct nfsnode *)nmp) { 1059 np->n_tnext = nmp->nm_tnext; 1060 nmp->nm_tnext = np; 1061 } else { 1062 np->n_tnext = tp->n_tnext; 1063 tp->n_tnext = np; 1064 } 1065 np->n_tprev = tp; 1066 if (np->n_tnext == (struct nfsnode *)nmp) 1067 nmp->nm_tprev = np; 1068 else 1069 np->n_tnext->n_tprev = np; 1070 } 1071 } 1072 } 1073 *mrp = mrep; 1074 *mdp = md; 1075 *dposp = dpos; 1076 m_freem(rep->r_mreq); 1077 FREE((caddr_t)rep, M_NFSREQ); 1078 return (0); 1079 } 1080 m_freem(mrep); 1081 m_freem(rep->r_mreq); 1082 free((caddr_t)rep, M_NFSREQ); 1083 error = EPROTONOSUPPORT; 1084 nfsmout: 1085 return (error); 1086 } 1087 1088 /* 1089 * Generate the rpc reply header 1090 * siz arg. is used to decide if adding a cluster is worthwhile 1091 */ 1092 nfs_rephead(siz, nd, err, cache, frev, mrq, mbp, bposp) 1093 int siz; 1094 struct nfsd *nd; 1095 int err; 1096 int cache; 1097 u_quad_t *frev; 1098 struct mbuf **mrq; 1099 struct mbuf **mbp; 1100 caddr_t *bposp; 1101 { 1102 register u_long *tl; 1103 register struct mbuf *mreq; 1104 caddr_t bpos; 1105 struct mbuf *mb, *mb2; 1106 1107 MGETHDR(mreq, M_WAIT, MT_DATA); 1108 mb = mreq; 1109 /* 1110 * If this is a big reply, use a cluster else 1111 * try and leave leading space for the lower level headers. 1112 */ 1113 siz += RPC_REPLYSIZ; 1114 if (siz >= MINCLSIZE) { 1115 MCLGET(mreq, M_WAIT); 1116 } else 1117 mreq->m_data += max_hdr; 1118 tl = mtod(mreq, u_long *); 1119 mreq->m_len = 6*NFSX_UNSIGNED; 1120 bpos = ((caddr_t)tl)+mreq->m_len; 1121 *tl++ = nd->nd_retxid; 1122 *tl++ = rpc_reply; 1123 if (err == ERPCMISMATCH || err == NQNFS_AUTHERR) { 1124 *tl++ = rpc_msgdenied; 1125 if (err == NQNFS_AUTHERR) { 1126 *tl++ = rpc_autherr; 1127 *tl = rpc_rejectedcred; 1128 mreq->m_len -= NFSX_UNSIGNED; 1129 bpos -= NFSX_UNSIGNED; 1130 } else { 1131 *tl++ = rpc_mismatch; 1132 *tl++ = txdr_unsigned(2); 1133 *tl = txdr_unsigned(2); 1134 } 1135 } else { 1136 *tl++ = rpc_msgaccepted; 1137 *tl++ = 0; 1138 *tl++ = 0; 1139 switch (err) { 1140 case EPROGUNAVAIL: 1141 *tl = txdr_unsigned(RPC_PROGUNAVAIL); 1142 break; 1143 case EPROGMISMATCH: 1144 *tl = txdr_unsigned(RPC_PROGMISMATCH); 1145 nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED); 1146 *tl++ = txdr_unsigned(2); 1147 *tl = txdr_unsigned(2); /* someday 3 */ 1148 break; 1149 case EPROCUNAVAIL: 1150 *tl = txdr_unsigned(RPC_PROCUNAVAIL); 1151 break; 1152 default: 1153 *tl = 0; 1154 if (err != VNOVAL) { 1155 nfsm_build(tl, u_long *, NFSX_UNSIGNED); 1156 if (err) 1157 *tl = txdr_unsigned(nfsrv_errmap[err - 1]); 1158 else 1159 *tl = 0; 1160 } 1161 break; 1162 }; 1163 } 1164 1165 /* 1166 * For nqnfs, piggyback lease as requested. 1167 */ 1168 if (nd->nd_nqlflag != NQL_NOVAL && err == 0) { 1169 if (nd->nd_nqlflag) { 1170 nfsm_build(tl, u_long *, 5*NFSX_UNSIGNED); 1171 *tl++ = txdr_unsigned(nd->nd_nqlflag); 1172 *tl++ = txdr_unsigned(cache); 1173 *tl++ = txdr_unsigned(nd->nd_duration); 1174 txdr_hyper(frev, tl); 1175 } else { 1176 if (nd->nd_nqlflag != 0) 1177 panic("nqreph"); 1178 nfsm_build(tl, u_long *, NFSX_UNSIGNED); 1179 *tl = 0; 1180 } 1181 } 1182 *mrq = mreq; 1183 *mbp = mb; 1184 *bposp = bpos; 1185 if (err != 0 && err != VNOVAL) 1186 nfsstats.srvrpc_errs++; 1187 return (0); 1188 } 1189 1190 /* 1191 * Nfs timer routine 1192 * Scan the nfsreq list and retranmit any requests that have timed out 1193 * To avoid retransmission attempts on STREAM sockets (in the future) make 1194 * sure to set the r_retry field to 0 (implies nm_retry == 0). 1195 */ 1196 nfs_timer() 1197 { 1198 register struct nfsreq *rep; 1199 register struct mbuf *m; 1200 register struct socket *so; 1201 register struct nfsmount *nmp; 1202 register int timeo; 1203 static long lasttime = 0; 1204 int s, error; 1205 1206 s = splnet(); 1207 for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) { 1208 nmp = rep->r_nmp; 1209 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) 1210 continue; 1211 if (nfs_sigintr(nmp, rep, rep->r_procp)) { 1212 rep->r_flags |= R_SOFTTERM; 1213 continue; 1214 } 1215 if (rep->r_rtt >= 0) { 1216 rep->r_rtt++; 1217 if (nmp->nm_flag & NFSMNT_DUMBTIMR) 1218 timeo = nmp->nm_timeo; 1219 else 1220 timeo = NFS_RTO(nmp, proct[rep->r_procnum]); 1221 if (nmp->nm_timeouts > 0) 1222 timeo *= nfs_backoff[nmp->nm_timeouts - 1]; 1223 if (rep->r_rtt <= timeo) 1224 continue; 1225 if (nmp->nm_timeouts < 8) 1226 nmp->nm_timeouts++; 1227 } 1228 /* 1229 * Check for server not responding 1230 */ 1231 if ((rep->r_flags & R_TPRINTFMSG) == 0 && 1232 rep->r_rexmit > nmp->nm_deadthresh) { 1233 nfs_msg(rep->r_procp, 1234 nmp->nm_mountp->mnt_stat.f_mntfromname, 1235 "not responding"); 1236 rep->r_flags |= R_TPRINTFMSG; 1237 } 1238 if (rep->r_rexmit >= rep->r_retry) { /* too many */ 1239 nfsstats.rpctimeouts++; 1240 rep->r_flags |= R_SOFTTERM; 1241 continue; 1242 } 1243 if (nmp->nm_sotype != SOCK_DGRAM) { 1244 if (++rep->r_rexmit > NFS_MAXREXMIT) 1245 rep->r_rexmit = NFS_MAXREXMIT; 1246 continue; 1247 } 1248 if ((so = nmp->nm_so) == NULL) 1249 continue; 1250 1251 /* 1252 * If there is enough space and the window allows.. 1253 * Resend it 1254 * Set r_rtt to -1 in case we fail to send it now. 1255 */ 1256 rep->r_rtt = -1; 1257 if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && 1258 ((nmp->nm_flag & NFSMNT_DUMBTIMR) || 1259 (rep->r_flags & R_SENT) || 1260 nmp->nm_sent < nmp->nm_cwnd) && 1261 (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){ 1262 if ((nmp->nm_flag & NFSMNT_NOCONN) == 0) 1263 error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 1264 (struct mbuf *)0, (struct mbuf *)0); 1265 else 1266 error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 1267 nmp->nm_nam, (struct mbuf *)0); 1268 if (error) { 1269 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) 1270 so->so_error = 0; 1271 } else { 1272 /* 1273 * Iff first send, start timing 1274 * else turn timing off, backoff timer 1275 * and divide congestion window by 2. 1276 */ 1277 if (rep->r_flags & R_SENT) { 1278 rep->r_flags &= ~R_TIMING; 1279 if (++rep->r_rexmit > NFS_MAXREXMIT) 1280 rep->r_rexmit = NFS_MAXREXMIT; 1281 nmp->nm_cwnd >>= 1; 1282 if (nmp->nm_cwnd < NFS_CWNDSCALE) 1283 nmp->nm_cwnd = NFS_CWNDSCALE; 1284 nfsstats.rpcretries++; 1285 } else { 1286 rep->r_flags |= R_SENT; 1287 nmp->nm_sent += NFS_CWNDSCALE; 1288 } 1289 rep->r_rtt = 0; 1290 } 1291 } 1292 } 1293 1294 /* 1295 * Call the nqnfs server timer once a second to handle leases. 1296 */ 1297 if (lasttime != time.tv_sec) { 1298 lasttime = time.tv_sec; 1299 nqnfs_serverd(); 1300 } 1301 splx(s); 1302 timeout(nfs_timer, (caddr_t)0, hz/NFS_HZ); 1303 } 1304 1305 /* 1306 * Test for a termination condition pending on the process. 1307 * This is used for NFSMNT_INT mounts. 1308 */ 1309 nfs_sigintr(nmp, rep, p) 1310 struct nfsmount *nmp; 1311 struct nfsreq *rep; 1312 register struct proc *p; 1313 { 1314 1315 if (rep && (rep->r_flags & R_SOFTTERM)) 1316 return (EINTR); 1317 if (!(nmp->nm_flag & NFSMNT_INT)) 1318 return (0); 1319 if (p && p->p_sig && (((p->p_sig &~ p->p_sigmask) &~ p->p_sigignore) & 1320 NFSINT_SIGMASK)) 1321 return (EINTR); 1322 return (0); 1323 } 1324 1325 /* 1326 * Lock a socket against others. 1327 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply 1328 * and also to avoid race conditions between the processes with nfs requests 1329 * in progress when a reconnect is necessary. 1330 */ 1331 nfs_sndlock(flagp, rep) 1332 register int *flagp; 1333 struct nfsreq *rep; 1334 { 1335 struct proc *p; 1336 1337 if (rep) 1338 p = rep->r_procp; 1339 else 1340 p = (struct proc *)0; 1341 while (*flagp & NFSMNT_SNDLOCK) { 1342 if (nfs_sigintr(rep->r_nmp, rep, p)) 1343 return (EINTR); 1344 *flagp |= NFSMNT_WANTSND; 1345 (void) tsleep((caddr_t)flagp, PZERO-1, "nfsndlck", 0); 1346 } 1347 *flagp |= NFSMNT_SNDLOCK; 1348 return (0); 1349 } 1350 1351 /* 1352 * Unlock the stream socket for others. 1353 */ 1354 void 1355 nfs_sndunlock(flagp) 1356 register int *flagp; 1357 { 1358 1359 if ((*flagp & NFSMNT_SNDLOCK) == 0) 1360 panic("nfs sndunlock"); 1361 *flagp &= ~NFSMNT_SNDLOCK; 1362 if (*flagp & NFSMNT_WANTSND) { 1363 *flagp &= ~NFSMNT_WANTSND; 1364 wakeup((caddr_t)flagp); 1365 } 1366 } 1367 1368 nfs_rcvlock(rep) 1369 register struct nfsreq *rep; 1370 { 1371 register int *flagp = &rep->r_nmp->nm_flag; 1372 1373 while (*flagp & NFSMNT_RCVLOCK) { 1374 if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp)) 1375 return (EINTR); 1376 *flagp |= NFSMNT_WANTRCV; 1377 (void) tsleep((caddr_t)flagp, PZERO-1, "nfsrcvlck", 0); 1378 } 1379 *flagp |= NFSMNT_RCVLOCK; 1380 return (0); 1381 } 1382 1383 /* 1384 * Unlock the stream socket for others. 1385 */ 1386 void 1387 nfs_rcvunlock(flagp) 1388 register int *flagp; 1389 { 1390 1391 if ((*flagp & NFSMNT_RCVLOCK) == 0) 1392 panic("nfs rcvunlock"); 1393 *flagp &= ~NFSMNT_RCVLOCK; 1394 if (*flagp & NFSMNT_WANTRCV) { 1395 *flagp &= ~NFSMNT_WANTRCV; 1396 wakeup((caddr_t)flagp); 1397 } 1398 } 1399 1400 /* 1401 * This function compares two net addresses by family and returns TRUE 1402 * if they are the same host. 1403 * If there is any doubt, return FALSE. 1404 * The AF_INET family is handled as a special case so that address mbufs 1405 * don't need to be saved to store "struct in_addr", which is only 4 bytes. 1406 */ 1407 nfs_netaddr_match(family, haddr, hmask, nam) 1408 int family; 1409 union nethostaddr *haddr; 1410 union nethostaddr *hmask; 1411 struct mbuf *nam; 1412 { 1413 register struct sockaddr_in *inetaddr; 1414 #ifdef ISO 1415 register struct sockaddr_iso *isoaddr1, *isoaddr2; 1416 #endif 1417 1418 1419 switch (family) { 1420 case AF_INET: 1421 inetaddr = mtod(nam, struct sockaddr_in *); 1422 if (inetaddr->sin_family != AF_INET) 1423 return (0); 1424 if (hmask) { 1425 if ((inetaddr->sin_addr.s_addr & hmask->had_inetaddr) == 1426 (haddr->had_inetaddr & hmask->had_inetaddr)) 1427 return (1); 1428 } else if (inetaddr->sin_addr.s_addr == haddr->had_inetaddr) 1429 return (1); 1430 break; 1431 #ifdef ISO 1432 case AF_ISO: 1433 isoaddr1 = mtod(nam, struct sockaddr_iso *); 1434 if (isoaddr1->siso_family != AF_ISO) 1435 return (0); 1436 isoaddr2 = mtod(haddr->had_nam, struct sockaddr_iso *); 1437 if (isoaddr1->siso_nlen > 0 && 1438 isoaddr1->siso_nlen == isoaddr2->siso_nlen && 1439 SAME_ISOADDR(isoaddr1, isoaddr2)) 1440 return (1); 1441 break; 1442 #endif /* ISO */ 1443 default: 1444 break; 1445 }; 1446 return (0); 1447 } 1448 1449 /* 1450 * Build hash lists of net addresses and hang them off the mount point. 1451 * Called by ufs_mount() to set up the lists of export addresses. 1452 */ 1453 hang_addrlist(mp, argp) 1454 struct mount *mp; 1455 struct ufs_args *argp; 1456 { 1457 register struct netaddrhash *np, **hnp; 1458 register int i; 1459 struct ufsmount *ump; 1460 struct sockaddr *saddr; 1461 struct mbuf *nam, *msk = (struct mbuf *)0; 1462 union nethostaddr netmsk; 1463 int error; 1464 1465 if (error = sockargs(&nam, (caddr_t)argp->saddr, argp->slen, 1466 MT_SONAME)) 1467 return (error); 1468 saddr = mtod(nam, struct sockaddr *); 1469 ump = VFSTOUFS(mp); 1470 if (saddr->sa_family == AF_INET && 1471 ((struct sockaddr_in *)saddr)->sin_addr.s_addr == INADDR_ANY) { 1472 m_freem(nam); 1473 if (mp->mnt_flag & MNT_DEFEXPORTED) 1474 return (EPERM); 1475 np = &ump->um_defexported; 1476 np->neth_exflags = argp->exflags; 1477 np->neth_anon = argp->anon; 1478 np->neth_anon.cr_ref = 1; 1479 mp->mnt_flag |= MNT_DEFEXPORTED; 1480 return (0); 1481 } 1482 if (argp->msklen > 0) { 1483 if (error = sockargs(&msk, (caddr_t)argp->smask, argp->msklen, 1484 MT_SONAME)) { 1485 m_freem(nam); 1486 return (error); 1487 } 1488 1489 /* 1490 * Scan all the hash lists to check against duplications. 1491 * For the net list, try both masks to catch a subnet 1492 * of another network. 1493 */ 1494 hnp = &ump->um_netaddr[NETMASK_HASH]; 1495 np = *hnp; 1496 if (saddr->sa_family == AF_INET) 1497 netmsk.had_inetaddr = 1498 mtod(msk, struct sockaddr_in *)->sin_addr.s_addr; 1499 else 1500 netmsk.had_nam = msk; 1501 while (np) { 1502 if (nfs_netaddr_match(np->neth_family, &np->neth_haddr, 1503 &np->neth_hmask, nam) || 1504 nfs_netaddr_match(np->neth_family, &np->neth_haddr, 1505 &netmsk, nam)) { 1506 m_freem(nam); 1507 m_freem(msk); 1508 return (EPERM); 1509 } 1510 np = np->neth_next; 1511 } 1512 for (i = 0; i < NETHASHSZ; i++) { 1513 np = ump->um_netaddr[i]; 1514 while (np) { 1515 if (nfs_netaddr_match(np->neth_family, &np->neth_haddr, 1516 &netmsk, nam)) { 1517 m_freem(nam); 1518 m_freem(msk); 1519 return (EPERM); 1520 } 1521 np = np->neth_next; 1522 } 1523 } 1524 } else { 1525 hnp = &ump->um_netaddr[NETADDRHASH(saddr)]; 1526 np = ump->um_netaddr[NETMASK_HASH]; 1527 while (np) { 1528 if (nfs_netaddr_match(np->neth_family, &np->neth_haddr, 1529 &np->neth_hmask, nam)) { 1530 m_freem(nam); 1531 return (EPERM); 1532 } 1533 np = np->neth_next; 1534 } 1535 np = *hnp; 1536 while (np) { 1537 if (nfs_netaddr_match(np->neth_family, &np->neth_haddr, 1538 (union nethostaddr *)0, nam)) { 1539 m_freem(nam); 1540 return (EPERM); 1541 } 1542 np = np->neth_next; 1543 } 1544 } 1545 np = (struct netaddrhash *) malloc(sizeof(struct netaddrhash), M_NETADDR, 1546 M_WAITOK); 1547 np->neth_family = saddr->sa_family; 1548 if (saddr->sa_family == AF_INET) { 1549 np->neth_inetaddr = ((struct sockaddr_in *)saddr)->sin_addr.s_addr; 1550 m_freem(nam); 1551 if (msk) { 1552 np->neth_inetmask = netmsk.had_inetaddr; 1553 m_freem(msk); 1554 if (np->neth_inetaddr &~ np->neth_inetmask) 1555 return (EPERM); 1556 } else 1557 np->neth_inetmask = 0xffffffff; 1558 } else { 1559 np->neth_nam = nam; 1560 np->neth_msk = msk; 1561 } 1562 np->neth_exflags = argp->exflags; 1563 np->neth_anon = argp->anon; 1564 np->neth_anon.cr_ref = 1; 1565 np->neth_next = *hnp; 1566 *hnp = np; 1567 return (0); 1568 } 1569 1570 /* 1571 * Free the net address hash lists that are hanging off the mount points. 1572 */ 1573 free_addrlist(ump) 1574 struct ufsmount *ump; 1575 { 1576 register struct netaddrhash *np, *onp; 1577 register int i; 1578 1579 for (i = 0; i <= NETHASHSZ; i++) { 1580 np = ump->um_netaddr[i]; 1581 ump->um_netaddr[i] = (struct netaddrhash *)0; 1582 while (np) { 1583 onp = np; 1584 np = np->neth_next; 1585 if (onp->neth_family != AF_INET) { 1586 m_freem(onp->neth_nam); 1587 m_freem(onp->neth_msk); 1588 } 1589 free((caddr_t)onp, M_NETADDR); 1590 } 1591 } 1592 } 1593 1594 /* 1595 * Generate a hash code for an iso host address. Used by NETADDRHASH() for 1596 * iso addresses. 1597 */ 1598 iso_addrhash(saddr) 1599 struct sockaddr *saddr; 1600 { 1601 #ifdef ISO 1602 register struct sockaddr_iso *siso; 1603 register int i, sum; 1604 1605 sum = 0; 1606 for (i = 0; i < siso->siso_nlen; i++) 1607 sum += siso->siso_data[i]; 1608 return (sum & (NETHASHSZ - 1)); 1609 #else 1610 return (0); 1611 #endif /* ISO */ 1612 } 1613 1614 /* 1615 * Check for badly aligned mbuf data areas and 1616 * realign data in an mbuf list by copying the data areas up, as required. 1617 */ 1618 void 1619 nfs_realign(m, hsiz) 1620 register struct mbuf *m; 1621 int hsiz; 1622 { 1623 register struct mbuf *m2; 1624 register int siz, mlen, olen; 1625 register caddr_t tcp, fcp; 1626 struct mbuf *mnew; 1627 1628 while (m) { 1629 /* 1630 * This never happens for UDP, rarely happens for TCP 1631 * but frequently happens for iso transport. 1632 */ 1633 if ((m->m_len & 0x3) || (mtod(m, int) & 0x3)) { 1634 olen = m->m_len; 1635 fcp = mtod(m, caddr_t); 1636 m->m_flags &= ~M_PKTHDR; 1637 if (m->m_flags & M_EXT) 1638 m->m_data = m->m_ext.ext_buf; 1639 else 1640 m->m_data = m->m_dat; 1641 m->m_len = 0; 1642 tcp = mtod(m, caddr_t); 1643 mnew = m; 1644 m2 = m->m_next; 1645 1646 /* 1647 * If possible, only put the first invariant part 1648 * of the RPC header in the first mbuf. 1649 */ 1650 if (olen <= hsiz) 1651 mlen = hsiz; 1652 else 1653 mlen = M_TRAILINGSPACE(m); 1654 1655 /* 1656 * Loop through the mbuf list consolidating data. 1657 */ 1658 while (m) { 1659 while (olen > 0) { 1660 if (mlen == 0) { 1661 m2->m_flags &= ~M_PKTHDR; 1662 if (m2->m_flags & M_EXT) 1663 m2->m_data = m2->m_ext.ext_buf; 1664 else 1665 m2->m_data = m2->m_dat; 1666 m2->m_len = 0; 1667 mlen = M_TRAILINGSPACE(m2); 1668 tcp = mtod(m2, caddr_t); 1669 mnew = m2; 1670 m2 = m2->m_next; 1671 } 1672 siz = MIN(mlen, olen); 1673 if (tcp != fcp) 1674 bcopy(fcp, tcp, siz); 1675 mnew->m_len += siz; 1676 mlen -= siz; 1677 olen -= siz; 1678 tcp += siz; 1679 fcp += siz; 1680 } 1681 m = m->m_next; 1682 if (m) { 1683 olen = m->m_len; 1684 fcp = mtod(m, caddr_t); 1685 } 1686 } 1687 1688 /* 1689 * Finally, set m_len == 0 for any trailing mbufs that have 1690 * been copied out of. 1691 */ 1692 while (m2) { 1693 m2->m_len = 0; 1694 m2 = m2->m_next; 1695 } 1696 return; 1697 } 1698 m = m->m_next; 1699 } 1700 } 1701 1702 /* 1703 * Socket upcall routine for the nfsd sockets. 1704 * The caddr_t arg is a pointer to the "struct nfssvc_sock". 1705 * Essentially do as much as possible non-blocking, else punt and it will 1706 * be called with M_WAIT from an nfsd. 1707 */ 1708 void 1709 nfsrv_rcv(so, arg, waitflag) 1710 struct socket *so; 1711 caddr_t arg; 1712 int waitflag; 1713 { 1714 register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg; 1715 register struct mbuf *m; 1716 struct mbuf *mp, *nam; 1717 struct uio auio; 1718 int flags, error; 1719 1720 if (so->so_type == SOCK_STREAM) { 1721 /* 1722 * If there are already records on the queue, defer soreceive() 1723 * to an nfsd so that there is feedback to the TCP layer that 1724 * the nfs servers are heavily loaded. 1725 */ 1726 if (slp->ns_rec && waitflag == M_DONTWAIT) { 1727 slp->ns_flag |= SLP_NEEDQ; 1728 nfsrv_wakenfsd(slp); 1729 return; 1730 } 1731 1732 /* 1733 * Do soreceive(). 1734 */ 1735 auio.uio_resid = 1000000000; 1736 flags = MSG_DONTWAIT; 1737 error = soreceive(so, &nam, &auio, &mp, (struct mbuf **)0, &flags); 1738 if (error || mp == (struct mbuf *)0) { 1739 if (error != EWOULDBLOCK) { 1740 slp->ns_flag |= SLP_DISCONN; 1741 if (waitflag == M_DONTWAIT) 1742 nfsrv_wakenfsd(slp); 1743 } 1744 goto dorecs; 1745 } 1746 m = mp; 1747 if (slp->ns_rawend) { 1748 slp->ns_rawend->m_next = m; 1749 slp->ns_cc += 1000000000 - auio.uio_resid; 1750 } else { 1751 slp->ns_raw = m; 1752 slp->ns_cc = 1000000000 - auio.uio_resid; 1753 } 1754 while (m->m_next) 1755 m = m->m_next; 1756 slp->ns_rawend = m; 1757 1758 /* 1759 * Now try and parse record(s) out of the raw stream data. 1760 */ 1761 if (error = nfsrv_getstream(slp, waitflag)) { 1762 if (error == EPERM) 1763 slp->ns_flag |= SLP_DISCONN; 1764 if (error == EWOULDBLOCK) 1765 slp->ns_flag |= SLP_NEEDQ; 1766 if (waitflag == M_DONTWAIT) 1767 nfsrv_wakenfsd(slp); 1768 } 1769 } else { 1770 do { 1771 auio.uio_resid = 1000000000; 1772 flags = MSG_DONTWAIT; 1773 error = soreceive(so, &nam, &auio, &mp, 1774 (struct mbuf **)0, &flags); 1775 if (mp) { 1776 nfs_realign(mp, 10 * NFSX_UNSIGNED); 1777 if (nam) { 1778 m = nam; 1779 m->m_next = mp; 1780 } else 1781 m = mp; 1782 if (slp->ns_recend) 1783 slp->ns_recend->m_nextpkt = m; 1784 else 1785 slp->ns_rec = m; 1786 slp->ns_recend = m; 1787 m->m_nextpkt = (struct mbuf *)0; 1788 } 1789 if (error) { 1790 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) 1791 && error != EWOULDBLOCK) { 1792 slp->ns_flag |= SLP_DISCONN; 1793 if (waitflag == M_DONTWAIT) 1794 nfsrv_wakenfsd(slp); 1795 } 1796 } 1797 } while (mp); 1798 } 1799 1800 /* 1801 * Now try and process the request records, non-blocking. 1802 */ 1803 dorecs: 1804 if (slp->ns_rec && waitflag == M_DONTWAIT) 1805 nfsrv_wakenfsd(slp); 1806 } 1807 1808 /* 1809 * Try and extract an RPC request from the mbuf data list received on a 1810 * stream socket. The "waitflag" argument indicates whether or not it 1811 * can sleep. 1812 */ 1813 nfsrv_getstream(slp, waitflag) 1814 register struct nfssvc_sock *slp; 1815 int waitflag; 1816 { 1817 register struct mbuf *m; 1818 register char *cp1, *cp2; 1819 register int len; 1820 struct mbuf *om, *m2, *recm; 1821 u_long recmark; 1822 1823 if (slp->ns_flag & SLP_GETSTREAM) 1824 panic("nfs getstream"); 1825 slp->ns_flag |= SLP_GETSTREAM; 1826 for (;;) { 1827 if (slp->ns_reclen == 0) { 1828 if (slp->ns_cc < NFSX_UNSIGNED) { 1829 slp->ns_flag &= ~SLP_GETSTREAM; 1830 return (0); 1831 } 1832 m = slp->ns_raw; 1833 if (m->m_len >= NFSX_UNSIGNED) { 1834 bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED); 1835 m->m_data += NFSX_UNSIGNED; 1836 m->m_len -= NFSX_UNSIGNED; 1837 } else { 1838 cp1 = (caddr_t)&recmark; 1839 cp2 = mtod(m, caddr_t); 1840 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) { 1841 while (m->m_len == 0) { 1842 m = m->m_next; 1843 cp2 = mtod(m, caddr_t); 1844 } 1845 *cp1++ = *cp2++; 1846 m->m_data++; 1847 m->m_len--; 1848 } 1849 } 1850 slp->ns_cc -= NFSX_UNSIGNED; 1851 slp->ns_reclen = ntohl(recmark) & ~0x80000000; 1852 if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) { 1853 slp->ns_flag &= ~SLP_GETSTREAM; 1854 return (EPERM); 1855 } 1856 } 1857 1858 /* 1859 * Now get the record part. 1860 */ 1861 if (slp->ns_cc == slp->ns_reclen) { 1862 recm = slp->ns_raw; 1863 slp->ns_raw = slp->ns_rawend = (struct mbuf *)0; 1864 slp->ns_cc = slp->ns_reclen = 0; 1865 } else if (slp->ns_cc > slp->ns_reclen) { 1866 len = 0; 1867 m = slp->ns_raw; 1868 om = (struct mbuf *)0; 1869 while (len < slp->ns_reclen) { 1870 if ((len + m->m_len) > slp->ns_reclen) { 1871 m2 = m_copym(m, 0, slp->ns_reclen - len, 1872 waitflag); 1873 if (m2) { 1874 if (om) { 1875 om->m_next = m2; 1876 recm = slp->ns_raw; 1877 } else 1878 recm = m2; 1879 m->m_data += slp->ns_reclen - len; 1880 m->m_len -= slp->ns_reclen - len; 1881 len = slp->ns_reclen; 1882 } else { 1883 slp->ns_flag &= ~SLP_GETSTREAM; 1884 return (EWOULDBLOCK); 1885 } 1886 } else if ((len + m->m_len) == slp->ns_reclen) { 1887 om = m; 1888 len += m->m_len; 1889 m = m->m_next; 1890 recm = slp->ns_raw; 1891 om->m_next = (struct mbuf *)0; 1892 } else { 1893 om = m; 1894 len += m->m_len; 1895 m = m->m_next; 1896 } 1897 } 1898 slp->ns_raw = m; 1899 slp->ns_cc -= len; 1900 slp->ns_reclen = 0; 1901 } else { 1902 slp->ns_flag &= ~SLP_GETSTREAM; 1903 return (0); 1904 } 1905 nfs_realign(recm, 10 * NFSX_UNSIGNED); 1906 if (slp->ns_recend) 1907 slp->ns_recend->m_nextpkt = recm; 1908 else 1909 slp->ns_rec = recm; 1910 slp->ns_recend = recm; 1911 } 1912 } 1913 1914 /* 1915 * Parse an RPC header. 1916 */ 1917 nfsrv_dorec(slp, nd) 1918 register struct nfssvc_sock *slp; 1919 register struct nfsd *nd; 1920 { 1921 register struct mbuf *m; 1922 int error; 1923 1924 if (slp->ns_sref != nd->nd_sref || 1925 (m = slp->ns_rec) == (struct mbuf *)0) 1926 return (ENOBUFS); 1927 if (slp->ns_rec = m->m_nextpkt) 1928 m->m_nextpkt = (struct mbuf *)0; 1929 else 1930 slp->ns_recend = (struct mbuf *)0; 1931 if (m->m_type == MT_SONAME) { 1932 nd->nd_nam = m; 1933 nd->nd_md = nd->nd_mrep = m->m_next; 1934 m->m_next = (struct mbuf *)0; 1935 } else { 1936 nd->nd_nam = (struct mbuf *)0; 1937 nd->nd_md = nd->nd_mrep = m; 1938 } 1939 nd->nd_dpos = mtod(nd->nd_md, caddr_t); 1940 if (error = nfs_getreq(nd, TRUE)) { 1941 m_freem(nd->nd_nam); 1942 return (error); 1943 } 1944 return (0); 1945 } 1946 1947 /* 1948 * Parse an RPC request 1949 * - verify it 1950 * - fill in the cred struct. 1951 */ 1952 nfs_getreq(nd, has_header) 1953 register struct nfsd *nd; 1954 int has_header; 1955 { 1956 register int len, i; 1957 register u_long *tl; 1958 register long t1; 1959 struct uio uio; 1960 struct iovec iov; 1961 caddr_t dpos, cp2; 1962 u_long nfsvers, auth_type; 1963 int error = 0, nqnfs = 0; 1964 struct mbuf *mrep, *md; 1965 1966 mrep = nd->nd_mrep; 1967 md = nd->nd_md; 1968 dpos = nd->nd_dpos; 1969 if (has_header) { 1970 nfsm_dissect(tl, u_long *, 10*NFSX_UNSIGNED); 1971 nd->nd_retxid = *tl++; 1972 if (*tl++ != rpc_call) { 1973 m_freem(mrep); 1974 return (EBADRPC); 1975 } 1976 } else { 1977 nfsm_dissect(tl, u_long *, 8*NFSX_UNSIGNED); 1978 } 1979 nd->nd_repstat = 0; 1980 if (*tl++ != rpc_vers) { 1981 nd->nd_repstat = ERPCMISMATCH; 1982 nd->nd_procnum = NFSPROC_NOOP; 1983 return (0); 1984 } 1985 nfsvers = nfs_vers; 1986 if (*tl != nfs_prog) { 1987 if (*tl == nqnfs_prog) { 1988 nqnfs++; 1989 nfsvers = nqnfs_vers; 1990 } else { 1991 nd->nd_repstat = EPROGUNAVAIL; 1992 nd->nd_procnum = NFSPROC_NOOP; 1993 return (0); 1994 } 1995 } 1996 tl++; 1997 if (*tl++ != nfsvers) { 1998 nd->nd_repstat = EPROGMISMATCH; 1999 nd->nd_procnum = NFSPROC_NOOP; 2000 return (0); 2001 } 2002 nd->nd_procnum = fxdr_unsigned(u_long, *tl++); 2003 if (nd->nd_procnum == NFSPROC_NULL) 2004 return (0); 2005 if (nd->nd_procnum >= NFS_NPROCS || 2006 (!nqnfs && nd->nd_procnum > NFSPROC_STATFS) || 2007 (*tl != rpc_auth_unix && *tl != rpc_auth_kerb)) { 2008 nd->nd_repstat = EPROCUNAVAIL; 2009 nd->nd_procnum = NFSPROC_NOOP; 2010 return (0); 2011 } 2012 auth_type = *tl++; 2013 len = fxdr_unsigned(int, *tl++); 2014 if (len < 0 || len > RPCAUTH_MAXSIZ) { 2015 m_freem(mrep); 2016 return (EBADRPC); 2017 } 2018 2019 /* 2020 * Handle auth_unix or auth_kerb. 2021 */ 2022 if (auth_type == rpc_auth_unix) { 2023 len = fxdr_unsigned(int, *++tl); 2024 if (len < 0 || len > NFS_MAXNAMLEN) { 2025 m_freem(mrep); 2026 return (EBADRPC); 2027 } 2028 nfsm_adv(nfsm_rndup(len)); 2029 nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED); 2030 nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++); 2031 nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++); 2032 len = fxdr_unsigned(int, *tl); 2033 if (len < 0 || len > RPCAUTH_UNIXGIDS) { 2034 m_freem(mrep); 2035 return (EBADRPC); 2036 } 2037 nfsm_dissect(tl, u_long *, (len + 2)*NFSX_UNSIGNED); 2038 for (i = 1; i <= len; i++) 2039 if (i < NGROUPS) 2040 nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++); 2041 else 2042 tl++; 2043 nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1); 2044 } else if (auth_type == rpc_auth_kerb) { 2045 nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++); 2046 nd->nd_authlen = fxdr_unsigned(int, *tl); 2047 iov.iov_len = uio.uio_resid = nfsm_rndup(nd->nd_authlen); 2048 if (uio.uio_resid > (len - 2*NFSX_UNSIGNED)) { 2049 m_freem(mrep); 2050 return (EBADRPC); 2051 } 2052 uio.uio_offset = 0; 2053 uio.uio_iov = &iov; 2054 uio.uio_iovcnt = 1; 2055 uio.uio_segflg = UIO_SYSSPACE; 2056 iov.iov_base = (caddr_t)nd->nd_authstr; 2057 nfsm_mtouio(&uio, uio.uio_resid); 2058 nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED); 2059 nd->nd_flag |= NFSD_NEEDAUTH; 2060 } 2061 2062 /* 2063 * Do we have any use for the verifier. 2064 * According to the "Remote Procedure Call Protocol Spec." it 2065 * should be AUTH_NULL, but some clients make it AUTH_UNIX? 2066 * For now, just skip over it 2067 */ 2068 len = fxdr_unsigned(int, *++tl); 2069 if (len < 0 || len > RPCAUTH_MAXSIZ) { 2070 m_freem(mrep); 2071 return (EBADRPC); 2072 } 2073 if (len > 0) { 2074 nfsm_adv(nfsm_rndup(len)); 2075 } 2076 2077 /* 2078 * For nqnfs, get piggybacked lease request. 2079 */ 2080 if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) { 2081 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 2082 nd->nd_nqlflag = fxdr_unsigned(int, *tl); 2083 if (nd->nd_nqlflag) { 2084 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 2085 nd->nd_duration = fxdr_unsigned(int, *tl); 2086 } else 2087 nd->nd_duration = NQ_MINLEASE; 2088 } else { 2089 nd->nd_nqlflag = NQL_NOVAL; 2090 nd->nd_duration = NQ_MINLEASE; 2091 } 2092 nd->nd_md = md; 2093 nd->nd_dpos = dpos; 2094 return (0); 2095 nfsmout: 2096 return (error); 2097 } 2098 2099 /* 2100 * Search for a sleeping nfsd and wake it up. 2101 * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the 2102 * running nfsds will go look for the work in the nfssvc_sock list. 2103 */ 2104 void 2105 nfsrv_wakenfsd(slp) 2106 struct nfssvc_sock *slp; 2107 { 2108 register struct nfsd *nd = nfsd_head.nd_next; 2109 2110 while (nd != (struct nfsd *)&nfsd_head) { 2111 if (nd->nd_flag & NFSD_WAITING) { 2112 nd->nd_flag &= ~NFSD_WAITING; 2113 if (nd->nd_slp) 2114 panic("nfsd wakeup"); 2115 nd->nd_slp = slp; 2116 nd->nd_sref = slp->ns_sref; 2117 wakeup((caddr_t)nd); 2118 return; 2119 } 2120 nd = nd->nd_next; 2121 } 2122 nfsd_head.nd_flag |= NFSD_CHECKSLP; 2123 } 2124 2125 nfs_msg(p, server, msg) 2126 struct proc *p; 2127 char *server, *msg; 2128 { 2129 tpr_t tpr; 2130 2131 if (p) 2132 tpr = tprintf_open(p); 2133 else 2134 tpr = NULL; 2135 tprintf(tpr, "nfs server %s: %s\n", server, msg); 2136 tprintf_close(tpr); 2137 } 2138