1 /*
2 * Copyright (c) 1989, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)nfs_vfsops.c 8.12 (Berkeley) 5/20/95
33 * $FreeBSD: src/sys/nfs/nfs_vfsops.c,v 1.91.2.7 2003/01/27 20:04:08 dillon Exp $
34 */
35
36 #include "opt_bootp.h"
37 #include "opt_nfsroot.h"
38
39 #include <sys/param.h>
40 #include <sys/sockio.h>
41 #include <sys/proc.h>
42 #include <sys/vnode.h>
43 #include <sys/fcntl.h>
44 #include <sys/kernel.h>
45 #include <sys/sysctl.h>
46 #include <sys/malloc.h>
47 #include <sys/mount.h>
48 #include <sys/mbuf.h>
49 #include <sys/socket.h>
50 #include <sys/socketvar.h>
51 #include <sys/systm.h>
52 #include <sys/objcache.h>
53
54 #include <vm/vm.h>
55 #include <vm/vm_extern.h>
56
57 #include <net/if.h>
58 #include <net/if_var.h>
59 #include <net/route.h>
60 #include <netinet/in.h>
61
62 #include <sys/thread2.h>
63 #include <sys/mutex2.h>
64
65 #include "rpcv2.h"
66 #include "nfsproto.h"
67 #include "nfs.h"
68 #include "nfsmount.h"
69 #include "nfsnode.h"
70 #include "xdr_subs.h"
71 #include "nfsm_subs.h"
72 #include "nfsdiskless.h"
73 #include "nfsmountrpc.h"
74
75 extern int nfs_mountroot(struct mount *mp);
76 extern void bootpc_init(void);
77
78 extern struct vop_ops nfsv2_vnode_vops;
79 extern struct vop_ops nfsv2_fifo_vops;
80 extern struct vop_ops nfsv2_spec_vops;
81
82 MALLOC_DEFINE(M_NFS, "NFS gen", "NFS general");
83 MALLOC_DEFINE(M_NFSREQ, "NFS req", "NFS request header");
84 MALLOC_DEFINE(M_NFSBIGFH, "NFSV3 bigfh", "NFS version 3 file handle");
85 MALLOC_DEFINE(M_NFSD, "NFS daemon", "Nfs server daemon structure");
86 MALLOC_DEFINE(M_NFSDIROFF, "NFSV3 diroff", "NFS directory offset data");
87 MALLOC_DEFINE(M_NFSRVDESC, "NFSV3 srvdesc", "NFS server socket descriptor");
88 MALLOC_DEFINE(M_NFSUID, "NFS uid", "Nfs uid mapping structure");
89
90 struct nfsstats nfsstats;
91 SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem");
92 SYSCTL_STRUCT(_vfs_nfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RD, &nfsstats, nfsstats,
93 "Nfs stats structure");
94 static int nfs_ip_paranoia = 1;
95 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW, &nfs_ip_paranoia, 0,
96 "Enable no-connection mode for protocols that support no-connection mode");
97 #ifdef NFS_DEBUG
98 int nfs_debug;
99 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0, "");
100 #endif
101
102 /*
103 * Tunable to determine the Read/Write unit size. Maximum value
104 * is NFS_MAXDATA. We also default to NFS_MAXDATA.
105 */
106 static int nfs_io_size = NFS_MAXDATA;
107 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_io_size, CTLFLAG_RW,
108 &nfs_io_size, 0, "NFS optimal I/O unit size");
109
110 static void nfs_decode_args (struct nfsmount *nmp,
111 struct nfs_args *argp);
112 static int mountnfs (struct nfs_args *,struct mount *,
113 struct sockaddr *,char *,char *,struct vnode **);
114 static int nfs_mount ( struct mount *mp, char *path, caddr_t data,
115 struct ucred *cred);
116 static int nfs_unmount ( struct mount *mp, int mntflags);
117 static int nfs_root ( struct mount *mp, struct vnode **vpp);
118 static int nfs_statfs ( struct mount *mp, struct statfs *sbp,
119 struct ucred *cred);
120 static int nfs_statvfs(struct mount *mp, struct statvfs *sbp,
121 struct ucred *cred);
122 static int nfs_sync ( struct mount *mp, int waitfor);
123
124 /*
125 * nfs vfs operations.
126 */
127 static struct vfsops nfs_vfsops = {
128 .vfs_flags = 0,
129 .vfs_mount = nfs_mount,
130 .vfs_unmount = nfs_unmount,
131 .vfs_root = nfs_root,
132 .vfs_statfs = nfs_statfs,
133 .vfs_statvfs = nfs_statvfs,
134 .vfs_sync = nfs_sync,
135 .vfs_init = nfs_init,
136 .vfs_uninit = nfs_uninit
137 };
138 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_MPSAFE);
139 MODULE_VERSION(nfs, 1);
140
141 /*
142 * This structure must be filled in by a primary bootstrap or bootstrap
143 * server for a diskless/dataless machine. It is initialized below just
144 * to ensure that it is allocated to initialized data (.data not .bss).
145 */
146 struct nfs_diskless nfs_diskless = { { { 0 } } };
147 struct nfsv3_diskless nfsv3_diskless = { { { 0 } } };
148 int nfs_diskless_valid = 0;
149
150 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
151 &nfs_diskless_valid, 0,
152 "NFS diskless params were obtained");
153
154 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
155 nfsv3_diskless.root_hostnam, 0,
156 "Host name for mount point");
157
158 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
159 &nfsv3_diskless.root_saddr, sizeof nfsv3_diskless.root_saddr,
160 "%Ssockaddr_in", "Address of root server");
161
162 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_swappath, CTLFLAG_RD,
163 nfsv3_diskless.swap_hostnam, 0,
164 "Host name for mount ppoint");
165
166 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_swapaddr, CTLFLAG_RD,
167 &nfsv3_diskless.swap_saddr, sizeof nfsv3_diskless.swap_saddr,
168 "%Ssockaddr_in", "Address of swap server");
169
170
171 void nfsargs_ntoh (struct nfs_args *);
172 static int nfs_mountdiskless (char *, char *, int,
173 struct sockaddr_in *, struct nfs_args *,
174 struct thread *, struct vnode **,
175 struct mount **);
176 static void nfs_convert_diskless (void);
177 static void nfs_convert_oargs (struct nfs_args *args,
178 struct onfs_args *oargs);
179
180 /*
181 * Calculate the buffer I/O block size to use. The maximum V2 block size
182 * is typically 8K, the maximum datagram size is typically 16K, and the
183 * maximum V3 block size is typically 32K. The buffer cache tends to work
184 * best with 16K blocks but we allow 32K for TCP connections.
185 *
186 * We force the block size to be at least a page for buffer cache efficiency.
187 */
188 static int
nfs_iosize(int v3,int sotype)189 nfs_iosize(int v3, int sotype)
190 {
191 int iosize;
192 int iomax;
193
194 if (v3) {
195 if (sotype == SOCK_STREAM)
196 iomax = NFS_MAXDATA;
197 else
198 iomax = NFS_MAXDGRAMDATA;
199 } else {
200 iomax = NFS_V2MAXDATA;
201 }
202 if ((iosize = nfs_io_size) > iomax)
203 iosize = iomax;
204 if (iosize < PAGE_SIZE)
205 iosize = PAGE_SIZE;
206
207 /*
208 * This is an aweful hack but until the buffer cache is rewritten
209 * we need it. The problem is that when you combine write() with
210 * mmap() the vm_page->valid bits can become weird looking
211 * (e.g. 0xfc). This occurs because NFS uses piecemeal buffers
212 * at the file EOF. To solve the problem the BIO system needs to
213 * be guarenteed that the NFS iosize for regular files will be a
214 * multiple of PAGE_SIZE so it can invalidate the whole page
215 * rather then just the piece of it owned by the buffer when
216 * NFS does vinvalbuf() calls.
217 */
218 if (iosize & PAGE_MASK)
219 iosize = (iosize & ~PAGE_MASK) + PAGE_SIZE;
220 return iosize;
221 }
222
223 static void
nfs_convert_oargs(struct nfs_args * args,struct onfs_args * oargs)224 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
225 {
226 args->version = NFS_ARGSVERSION;
227 args->addr = oargs->addr;
228 args->addrlen = oargs->addrlen;
229 args->sotype = oargs->sotype;
230 args->proto = oargs->proto;
231 args->fh = oargs->fh;
232 args->fhsize = oargs->fhsize;
233 args->flags = oargs->flags;
234 args->wsize = oargs->wsize;
235 args->rsize = oargs->rsize;
236 args->readdirsize = oargs->readdirsize;
237 args->timeo = oargs->timeo;
238 args->retrans = oargs->retrans;
239 args->maxgrouplist = oargs->maxgrouplist;
240 args->readahead = oargs->readahead;
241 args->deadthresh = oargs->deadthresh;
242 args->hostname = oargs->hostname;
243 }
244
245 static void
nfs_convert_diskless(void)246 nfs_convert_diskless(void)
247 {
248 int i;
249
250 bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
251 sizeof(struct ifaliasreq));
252 bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
253 sizeof(struct sockaddr_in));
254 nfs_convert_oargs(&nfsv3_diskless.swap_args, &nfs_diskless.swap_args);
255
256 /*
257 * Copy the NFS handle passed from the diskless code.
258 *
259 * XXX CURRENTLY DISABLED - bootp passes us a NFSv2 handle which
260 * will fail utterly with HAMMER due to limitations with NFSv2
261 * directory cookies.
262 */
263 bcopy(nfs_diskless.swap_fh, nfsv3_diskless.swap_fh, NFSX_V2FH);
264 nfsv3_diskless.swap_fhsize = NFSX_V2FH;
265 for (i = NFSX_V2FH - 1; i >= 0; --i) {
266 if (nfs_diskless.swap_fh[i])
267 break;
268 }
269 if (i < 0)
270 nfsv3_diskless.swap_fhsize = 0;
271 nfsv3_diskless.swap_fhsize = 0; /* FORCE DISABLE */
272
273 bcopy(&nfs_diskless.swap_saddr,&nfsv3_diskless.swap_saddr,
274 sizeof(struct sockaddr_in));
275 bcopy(nfs_diskless.swap_hostnam,nfsv3_diskless.swap_hostnam, MNAMELEN);
276 nfsv3_diskless.swap_nblks = nfs_diskless.swap_nblks;
277 bcopy(&nfs_diskless.swap_ucred, &nfsv3_diskless.swap_ucred,
278 sizeof(struct ucred));
279 nfs_convert_oargs(&nfsv3_diskless.root_args, &nfs_diskless.root_args);
280
281 /*
282 * Copy the NFS handle passed from the diskless code.
283 *
284 * XXX CURRENTLY DISABLED - bootp passes us a NFSv2 handle which
285 * will fail utterly with HAMMER due to limitations with NFSv2
286 * directory cookies.
287 */
288 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
289 nfsv3_diskless.root_fhsize = NFSX_V2FH;
290 for (i = NFSX_V2FH - 1; i >= 0; --i) {
291 if (nfs_diskless.root_fh[i])
292 break;
293 }
294 if (i < 0)
295 nfsv3_diskless.root_fhsize = 0;
296 nfsv3_diskless.root_fhsize = 0; /* FORCE DISABLE */
297
298 bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
299 sizeof(struct sockaddr_in));
300 bcopy(nfs_diskless.root_hostnam,nfsv3_diskless.root_hostnam, MNAMELEN);
301 nfsv3_diskless.root_time = nfs_diskless.root_time;
302 bcopy(nfs_diskless.my_hostnam,nfsv3_diskless.my_hostnam,
303 MAXHOSTNAMELEN);
304 nfs_diskless_valid = 3;
305 }
306
307 /*
308 * nfs statfs call
309 */
310 int
nfs_statfs(struct mount * mp,struct statfs * sbp,struct ucred * cred)311 nfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred)
312 {
313 struct vnode *vp;
314 struct nfs_statfs *sfp;
315 struct nfsmount *nmp = VFSTONFS(mp);
316 thread_t td = curthread;
317 int error = 0, retattr;
318 struct nfsnode *np;
319 u_quad_t tquad;
320 struct nfsm_info info;
321
322 info.mrep = NULL;
323 info.v3 = (nmp->nm_flag & NFSMNT_NFSV3);
324
325 lwkt_gettoken(&nmp->nm_token);
326
327 #ifndef nolint
328 sfp = NULL;
329 #endif
330 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, NULL);
331 if (error) {
332 lwkt_reltoken(&nmp->nm_token);
333 return (error);
334 }
335 vp = NFSTOV(np);
336 /* ignore the passed cred */
337 cred = crget();
338 cred->cr_ngroups = 1;
339 if (info.v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
340 (void)nfs_fsinfo(nmp, vp, td);
341 nfsstats.rpccnt[NFSPROC_FSSTAT]++;
342 nfsm_reqhead(&info, vp, NFSPROC_FSSTAT, NFSX_FH(info.v3));
343 ERROROUT(nfsm_fhtom(&info, vp));
344 NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_FSSTAT, td, cred, &error));
345 if (info.v3) {
346 ERROROUT(nfsm_postop_attr(&info, vp, &retattr,
347 NFS_LATTR_NOSHRINK));
348 }
349 if (error) {
350 if (info.mrep != NULL)
351 m_freem(info.mrep);
352 goto nfsmout;
353 }
354 NULLOUT(sfp = nfsm_dissect(&info, NFSX_STATFS(info.v3)));
355 sbp->f_flags = nmp->nm_flag;
356
357 if (info.v3) {
358 sbp->f_bsize = NFS_FABLKSIZE;
359 tquad = fxdr_hyper(&sfp->sf_tbytes);
360 sbp->f_blocks = (long)(tquad / ((u_quad_t)NFS_FABLKSIZE));
361 tquad = fxdr_hyper(&sfp->sf_fbytes);
362 sbp->f_bfree = (long)(tquad / ((u_quad_t)NFS_FABLKSIZE));
363 tquad = fxdr_hyper(&sfp->sf_abytes);
364 sbp->f_bavail = (long)(tquad / ((u_quad_t)NFS_FABLKSIZE));
365 sbp->f_files = (fxdr_unsigned(int32_t,
366 sfp->sf_tfiles.nfsuquad[1]) & 0x7fffffff);
367 sbp->f_ffree = (fxdr_unsigned(int32_t,
368 sfp->sf_ffiles.nfsuquad[1]) & 0x7fffffff);
369 } else {
370 sbp->f_bsize = fxdr_unsigned(int32_t, sfp->sf_bsize);
371 sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks);
372 sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree);
373 sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail);
374 sbp->f_files = 0;
375 sbp->f_ffree = 0;
376 }
377
378 /*
379 * Some values are pre-set in mnt_stat. Note in particular f_iosize
380 * cannot be changed once the filesystem is mounted as it is used
381 * as the basis for BIOs.
382 */
383 if (sbp != &mp->mnt_stat) {
384 sbp->f_type = mp->mnt_vfc->vfc_typenum;
385 bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
386 sbp->f_iosize = mp->mnt_stat.f_iosize;
387 }
388 m_freem(info.mrep);
389 info.mrep = NULL;
390 nfsmout:
391 vput(vp);
392 crfree(cred);
393 lwkt_reltoken(&nmp->nm_token);
394 return (error);
395 }
396
397 static int
nfs_statvfs(struct mount * mp,struct statvfs * sbp,struct ucred * cred)398 nfs_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred)
399 {
400 struct vnode *vp;
401 struct nfs_statfs *sfp;
402 struct nfsmount *nmp = VFSTONFS(mp);
403 thread_t td = curthread;
404 int error = 0, retattr;
405 struct nfsnode *np;
406 struct nfsm_info info;
407
408 info.mrep = NULL;
409 info.v3 = (nmp->nm_flag & NFSMNT_NFSV3);
410 lwkt_gettoken(&nmp->nm_token);
411
412 #ifndef nolint
413 sfp = NULL;
414 #endif
415 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, NULL);
416 if (error) {
417 lwkt_reltoken(&nmp->nm_token);
418 return (error);
419 }
420 vp = NFSTOV(np);
421 /* ignore the passed cred */
422 cred = crget();
423 cred->cr_ngroups = 1;
424 if (info.v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
425 (void)nfs_fsinfo(nmp, vp, td);
426 nfsstats.rpccnt[NFSPROC_FSSTAT]++;
427 nfsm_reqhead(&info, vp, NFSPROC_FSSTAT, NFSX_FH(info.v3));
428 ERROROUT(nfsm_fhtom(&info, vp));
429 NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_FSSTAT, td, cred, &error));
430 if (info.v3) {
431 ERROROUT(nfsm_postop_attr(&info, vp, &retattr,
432 NFS_LATTR_NOSHRINK));
433 }
434 if (error) {
435 if (info.mrep != NULL)
436 m_freem(info.mrep);
437 goto nfsmout;
438 }
439 NULLOUT(sfp = nfsm_dissect(&info, NFSX_STATFS(info.v3)));
440 sbp->f_flag = nmp->nm_flag;
441 sbp->f_owner = nmp->nm_cred->cr_ruid;
442
443 if (info.v3) {
444 sbp->f_bsize = NFS_FABLKSIZE;
445 sbp->f_frsize = NFS_FABLKSIZE;
446 sbp->f_blocks = (fxdr_hyper(&sfp->sf_tbytes) /
447 ((u_quad_t)NFS_FABLKSIZE));
448 sbp->f_bfree = (fxdr_hyper(&sfp->sf_fbytes) /
449 ((u_quad_t)NFS_FABLKSIZE));
450 sbp->f_bavail = (fxdr_hyper(&sfp->sf_abytes) /
451 ((u_quad_t)NFS_FABLKSIZE));
452 sbp->f_files = fxdr_hyper(&sfp->sf_tfiles);
453 sbp->f_ffree = fxdr_hyper(&sfp->sf_ffiles);
454 sbp->f_favail = fxdr_hyper(&sfp->sf_afiles);
455 } else {
456 sbp->f_bsize = fxdr_unsigned(int32_t, sfp->sf_bsize);
457 sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks);
458 sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree);
459 sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail);
460 sbp->f_files = 0;
461 sbp->f_ffree = 0;
462 sbp->f_favail = 0;
463 }
464 sbp->f_syncreads = 0;
465 sbp->f_syncwrites = 0;
466 sbp->f_asyncreads = 0;
467 sbp->f_asyncwrites = 0;
468 sbp->f_type = mp->mnt_vfc->vfc_typenum;
469
470 m_freem(info.mrep);
471 info.mrep = NULL;
472 nfsmout:
473 vput(vp);
474 crfree(cred);
475 lwkt_reltoken(&nmp->nm_token);
476 return (error);
477 }
478
479 /*
480 * nfs version 3 fsinfo rpc call
481 */
482 int
nfs_fsinfo(struct nfsmount * nmp,struct vnode * vp,struct thread * td)483 nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct thread *td)
484 {
485 struct nfsv3_fsinfo *fsp;
486 u_int32_t pref, max;
487 int error = 0, retattr;
488 u_int64_t maxfsize;
489 struct nfsm_info info;
490
491 info.v3 = 1;
492 nfsstats.rpccnt[NFSPROC_FSINFO]++;
493 nfsm_reqhead(&info, vp, NFSPROC_FSINFO, NFSX_FH(1));
494 ERROROUT(nfsm_fhtom(&info, vp));
495 NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_FSINFO, td,
496 nfs_vpcred(vp, ND_READ), &error));
497 ERROROUT(nfsm_postop_attr(&info, vp, &retattr, NFS_LATTR_NOSHRINK));
498 if (error == 0) {
499 NULLOUT(fsp = nfsm_dissect(&info, NFSX_V3FSINFO));
500 pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref);
501 if (pref < nmp->nm_wsize && pref >= NFS_FABLKSIZE)
502 nmp->nm_wsize = roundup2(pref, NFS_FABLKSIZE);
503 max = fxdr_unsigned(u_int32_t, fsp->fs_wtmax);
504 if (max < nmp->nm_wsize && max > 0) {
505 nmp->nm_wsize = rounddown2(max, NFS_FABLKSIZE);
506 if (nmp->nm_wsize == 0)
507 nmp->nm_wsize = max;
508 }
509 pref = fxdr_unsigned(u_int32_t, fsp->fs_rtpref);
510 if (pref < nmp->nm_rsize && pref >= NFS_FABLKSIZE)
511 nmp->nm_rsize = roundup2(pref, NFS_FABLKSIZE);
512 max = fxdr_unsigned(u_int32_t, fsp->fs_rtmax);
513 if (max < nmp->nm_rsize && max > 0) {
514 nmp->nm_rsize = rounddown2(max, NFS_FABLKSIZE);
515 if (nmp->nm_rsize == 0)
516 nmp->nm_rsize = max;
517 }
518 pref = fxdr_unsigned(u_int32_t, fsp->fs_dtpref);
519 if (pref < nmp->nm_readdirsize && pref >= NFS_DIRBLKSIZ)
520 nmp->nm_readdirsize = roundup2(pref, NFS_DIRBLKSIZ);
521 if (max < nmp->nm_readdirsize && max > 0) {
522 nmp->nm_readdirsize = rounddown2(max, NFS_DIRBLKSIZ);
523 if (nmp->nm_readdirsize == 0)
524 nmp->nm_readdirsize = max;
525 }
526 maxfsize = fxdr_hyper(&fsp->fs_maxfilesize);
527 if (maxfsize > 0 && maxfsize < nmp->nm_maxfilesize)
528 nmp->nm_maxfilesize = maxfsize;
529 nmp->nm_state |= NFSSTA_GOTFSINFO;
530
531 /*
532 * Use the smaller of rsize/wsize for the biosize.
533 */
534 if (nmp->nm_rsize < nmp->nm_wsize)
535 nmp->nm_mountp->mnt_stat.f_iosize = nmp->nm_rsize;
536 else
537 nmp->nm_mountp->mnt_stat.f_iosize = nmp->nm_wsize;
538 }
539 m_freem(info.mrep);
540 info.mrep = NULL;
541 nfsmout:
542 return (error);
543 }
544
545 /*
546 * Mount a remote root fs via. nfs. This depends on the info in the
547 * nfs_diskless structure that has been filled in properly by some primary
548 * bootstrap.
549 * It goes something like this:
550 * - do enough of "ifconfig" by calling ifioctl() so that the system
551 * can talk to the server
552 * - If nfs_diskless.mygateway is filled in, use that address as
553 * a default gateway.
554 * - build the rootfs mount point and call mountnfs() to do the rest.
555 */
556 int
nfs_mountroot(struct mount * mp)557 nfs_mountroot(struct mount *mp)
558 {
559 struct mount *swap_mp;
560 struct nfsv3_diskless *nd = &nfsv3_diskless;
561 struct socket *so;
562 struct vnode *vp;
563 struct thread *td = curthread; /* XXX */
564 int error, i;
565 u_long l;
566 char buf[128], addr[INET_ADDRSTRLEN];
567
568 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
569 bootpc_init(); /* use bootp to get nfs_diskless filled in */
570 #endif
571
572 /*
573 * XXX time must be non-zero when we init the interface or else
574 * the arp code will wedge...
575 */
576 while (mycpu->gd_time_seconds == 0)
577 tsleep(mycpu, 0, "arpkludge", 10);
578
579 /*
580 * The boot code may have passed us a diskless structure.
581 */
582 kprintf("DISKLESS %d\n", nfs_diskless_valid);
583 if (nfs_diskless_valid == 1)
584 nfs_convert_diskless();
585
586 /*
587 * NFSv3 is required.
588 */
589 nd->root_args.flags |= NFSMNT_NFSV3 | NFSMNT_RDIRPLUS;
590 nd->swap_args.flags |= NFSMNT_NFSV3;
591
592 #define SINP(sockaddr) ((struct sockaddr_in *)(sockaddr))
593 kprintf("nfs_mountroot: interface %s ip %s",
594 nd->myif.ifra_name,
595 kinet_ntoa(SINP(&nd->myif.ifra_addr)->sin_addr, addr));
596 kprintf(" bcast %s",
597 kinet_ntoa(SINP(&nd->myif.ifra_broadaddr)->sin_addr, addr));
598 kprintf(" mask %s\n",
599 kinet_ntoa(SINP(&nd->myif.ifra_mask)->sin_addr, addr));
600 #undef SINP
601
602 /*
603 * XXX splnet, so networks will receive...
604 */
605 crit_enter();
606
607 /*
608 * BOOTP does not necessarily have to be compiled into the kernel
609 * for an NFS root to work. If we inherited the network
610 * configuration for PXEBOOT then pxe_setup_nfsdiskless() has figured
611 * out our interface for us and all we need to do is ifconfig the
612 * interface. We only do this if the interface has not already been
613 * ifconfig'd by e.g. BOOTP.
614 */
615 error = socreate(nd->myif.ifra_addr.sa_family, &so, SOCK_DGRAM, 0, td);
616 if (error) {
617 panic("nfs_mountroot: socreate(%04x): %d",
618 nd->myif.ifra_addr.sa_family, error);
619 }
620
621 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, proc0.p_ucred);
622 if (error)
623 panic("nfs_mountroot: SIOCAIFADDR: %d", error);
624
625 soclose(so, FNONBLOCK);
626
627 /*
628 * If the gateway field is filled in, set it as the default route.
629 */
630 if (nd->mygateway.sin_len != 0) {
631 struct sockaddr_in mask, sin;
632
633 bzero((caddr_t)&mask, sizeof(mask));
634 sin = mask;
635 sin.sin_family = AF_INET;
636 sin.sin_len = sizeof(sin);
637 kprintf("nfs_mountroot: gateway %s\n",
638 kinet_ntoa(nd->mygateway.sin_addr, addr));
639 error = rtrequest_global(RTM_ADD, (struct sockaddr *)&sin,
640 (struct sockaddr *)&nd->mygateway,
641 (struct sockaddr *)&mask,
642 RTF_UP | RTF_GATEWAY);
643 if (error)
644 kprintf("nfs_mountroot: unable to set gateway, error %d, continuing anyway\n", error);
645 }
646
647 /*
648 * Create the rootfs mount point.
649 */
650 nd->root_args.fh = nd->root_fh;
651 nd->root_args.fhsize = nd->root_fhsize;
652 l = ntohl(nd->root_saddr.sin_addr.s_addr);
653 ksnprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
654 (l >> 24) & 0xff, (l >> 16) & 0xff,
655 (l >> 8) & 0xff, (l >> 0) & 0xff,nd->root_hostnam);
656 kprintf("NFS_ROOT: %s\n",buf);
657 error = nfs_mountdiskless(buf, "/", MNT_RDONLY, &nd->root_saddr,
658 &nd->root_args, td, &vp, &mp);
659 if (error) {
660 mp->mnt_vfc->vfc_refcount--;
661 crit_exit();
662 return (error);
663 }
664
665 swap_mp = NULL;
666 if (nd->swap_nblks) {
667
668 /* Convert to DEV_BSIZE instead of Kilobyte */
669 nd->swap_nblks *= 2;
670
671 /*
672 * Create a fake mount point just for the swap vnode so that the
673 * swap file can be on a different server from the rootfs.
674 */
675 nd->swap_args.fh = nd->swap_fh;
676 nd->swap_args.fhsize = nd->swap_fhsize;
677 l = ntohl(nd->swap_saddr.sin_addr.s_addr);
678 ksnprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
679 (l >> 24) & 0xff, (l >> 16) & 0xff,
680 (l >> 8) & 0xff, (l >> 0) & 0xff,nd->swap_hostnam);
681 kprintf("NFS SWAP: %s\n",buf);
682 vp = NULL; /* avoid gcc warnings */
683 error = nfs_mountdiskless(buf, "/swap", 0, &nd->swap_saddr,
684 &nd->swap_args, td, &vp, &swap_mp);
685 if (error) {
686 crit_exit();
687 return (error);
688 }
689 vfs_unbusy(swap_mp);
690
691 VTONFS(vp)->n_size = VTONFS(vp)->n_vattr.va_size =
692 nd->swap_nblks * DEV_BSIZE ;
693
694 /*
695 * Since the swap file is not the root dir of a file system,
696 * hack it to a regular file.
697 */
698 vclrflags(vp, VROOT);
699 vref(vp);
700 nfs_setvtype(vp, VREG);
701 swaponvp(td, vp, nd->swap_nblks);
702 }
703
704 mp->mnt_flag |= MNT_ROOTFS;
705
706 /*
707 * This is not really an nfs issue, but it is much easier to
708 * set hostname here and then let the "/etc/rc.xxx" files
709 * mount the right /var based upon its preset value.
710 */
711 bcopy(nd->my_hostnam, hostname, MAXHOSTNAMELEN);
712 hostname[MAXHOSTNAMELEN - 1] = '\0';
713 for (i = 0; i < MAXHOSTNAMELEN; i++)
714 if (hostname[i] == '\0')
715 break;
716 inittodr(ntohl(nd->root_time));
717 crit_exit();
718 return (0);
719 }
720
721 /*
722 * Internal version of mount system call for diskless setup.
723 */
724 static int
nfs_mountdiskless(char * path,char * which,int mountflag,struct sockaddr_in * sin,struct nfs_args * args,struct thread * td,struct vnode ** vpp,struct mount ** mpp)725 nfs_mountdiskless(char *path, char *which, int mountflag,
726 struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
727 struct vnode **vpp, struct mount **mpp)
728 {
729 struct mount *mp;
730 struct sockaddr *nam;
731 int didalloc = 0;
732 int error;
733
734 mp = *mpp;
735
736 if (mp == NULL) {
737 if ((error = vfs_rootmountalloc("nfs", path, &mp)) != 0) {
738 kprintf("nfs_mountroot: NFS not configured");
739 return (error);
740 }
741 didalloc = 1;
742 }
743 mp->mnt_kern_flag = 0;
744 mp->mnt_flag = mountflag;
745 nam = dup_sockaddr((struct sockaddr *)sin);
746
747 #if defined(BOOTP) || defined(NFS_ROOT)
748 if (args->fhsize == 0) {
749 char *xpath = path;
750
751 kprintf("NFS_ROOT: No FH passed from loader, attempting "
752 "mount rpc...");
753 while (*xpath && *xpath != ':')
754 ++xpath;
755 if (*xpath)
756 ++xpath;
757 args->fhsize = 0;
758 error = md_mount(sin, xpath, args->fh, &args->fhsize, args, td);
759 if (error) {
760 kprintf("failed error %d.\n", error);
761 goto haderror;
762 }
763 kprintf("success!\n");
764 }
765 #endif
766
767 if ((error = mountnfs(args, mp, nam, which, path, vpp)) != 0) {
768 #if defined(BOOTP) || defined(NFS_ROOT)
769 haderror:
770 #endif
771 kprintf("nfs_mountroot: mount %s on %s: %d", path, which, error);
772 mp->mnt_vfc->vfc_refcount--;
773 if (didalloc)
774 kfree(mp, M_MOUNT);
775 kfree(nam, M_SONAME);
776 return (error);
777 }
778 *mpp = mp;
779 return (0);
780 }
781
782 static void
nfs_decode_args(struct nfsmount * nmp,struct nfs_args * argp)783 nfs_decode_args(struct nfsmount *nmp, struct nfs_args *argp)
784 {
785 int adjsock;
786 int maxio;
787
788 crit_enter();
789 /*
790 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
791 * no sense in that context.
792 */
793 if (nmp->nm_sotype == SOCK_STREAM) {
794 nmp->nm_flag &= ~NFSMNT_NOCONN;
795 argp->flags &= ~NFSMNT_NOCONN;
796 }
797
798 /*
799 * readdirplus is NFSv3 only.
800 */
801 if ((argp->flags & NFSMNT_NFSV3) == 0) {
802 nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
803 argp->flags &= ~NFSMNT_RDIRPLUS;
804 }
805
806 /*
807 * Re-bind if rsrvd port flag has changed
808 */
809 adjsock = (nmp->nm_flag & NFSMNT_RESVPORT) !=
810 (argp->flags & NFSMNT_RESVPORT);
811
812 /* Update flags atomically. Don't change the lock bits. */
813 nmp->nm_flag = argp->flags | nmp->nm_flag;
814 crit_exit();
815
816 if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
817 nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
818 if (nmp->nm_timeo < NFS_MINTIMEO)
819 nmp->nm_timeo = NFS_MINTIMEO;
820 else if (nmp->nm_timeo > NFS_MAXTIMEO)
821 nmp->nm_timeo = NFS_MAXTIMEO;
822 }
823
824 if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
825 nmp->nm_retry = argp->retrans;
826 if (nmp->nm_retry > NFS_MAXREXMIT)
827 nmp->nm_retry = NFS_MAXREXMIT;
828 }
829
830 /*
831 * These parameters effect the buffer cache and cannot be changed
832 * once we've successfully mounted.
833 */
834 if ((nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
835 maxio = nfs_iosize(argp->flags & NFSMNT_NFSV3, nmp->nm_sotype);
836
837 if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
838 nmp->nm_wsize = argp->wsize;
839 /* Round down to multiple of blocksize */
840 nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1);
841 if (nmp->nm_wsize <= 0)
842 nmp->nm_wsize = NFS_FABLKSIZE;
843 }
844 if (nmp->nm_wsize > maxio)
845 nmp->nm_wsize = maxio;
846 if (nmp->nm_wsize > MAXBSIZE)
847 nmp->nm_wsize = MAXBSIZE;
848
849 if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
850 nmp->nm_rsize = argp->rsize;
851 /* Round down to multiple of blocksize */
852 nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1);
853 if (nmp->nm_rsize <= 0)
854 nmp->nm_rsize = NFS_FABLKSIZE;
855 }
856 if (nmp->nm_rsize > maxio)
857 nmp->nm_rsize = maxio;
858 if (nmp->nm_rsize > MAXBSIZE)
859 nmp->nm_rsize = MAXBSIZE;
860
861 if ((argp->flags & NFSMNT_READDIRSIZE) &&
862 argp->readdirsize > 0) {
863 nmp->nm_readdirsize = argp->readdirsize;
864 }
865 if (nmp->nm_readdirsize > maxio)
866 nmp->nm_readdirsize = maxio;
867 if (nmp->nm_readdirsize > nmp->nm_rsize)
868 nmp->nm_readdirsize = nmp->nm_rsize;
869 }
870
871 if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
872 nmp->nm_acregmin = argp->acregmin;
873 else
874 nmp->nm_acregmin = NFS_MINATTRTIMO;
875 if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
876 nmp->nm_acregmax = argp->acregmax;
877 else
878 nmp->nm_acregmax = NFS_MAXATTRTIMO;
879 if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
880 nmp->nm_acdirmin = argp->acdirmin;
881 else
882 nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
883 if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
884 nmp->nm_acdirmax = argp->acdirmax;
885 else
886 nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
887 if (nmp->nm_acdirmin > nmp->nm_acdirmax)
888 nmp->nm_acdirmin = nmp->nm_acdirmax;
889 if (nmp->nm_acregmin > nmp->nm_acregmax)
890 nmp->nm_acregmin = nmp->nm_acregmax;
891
892 if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) {
893 if (argp->maxgrouplist <= NFS_MAXGRPS)
894 nmp->nm_numgrps = argp->maxgrouplist;
895 else
896 nmp->nm_numgrps = NFS_MAXGRPS;
897 }
898 if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
899 if (argp->readahead <= NFS_MAXRAHEAD)
900 nmp->nm_readahead = argp->readahead;
901 else
902 nmp->nm_readahead = NFS_MAXRAHEAD;
903 }
904 if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1) {
905 if (argp->deadthresh <= NFS_NEVERDEAD)
906 nmp->nm_deadthresh = argp->deadthresh;
907 else
908 nmp->nm_deadthresh = NFS_NEVERDEAD;
909 }
910
911 if (nmp->nm_so && adjsock) {
912 nfs_safedisconnect(nmp);
913 if (nmp->nm_sotype == SOCK_DGRAM)
914 while (nfs_connect(nmp, NULL)) {
915 kprintf("nfs_args: retrying connect\n");
916 (void) tsleep((caddr_t)&lbolt, 0, "nfscon", 0);
917 }
918 }
919 }
920
921 /*
922 * VFS Operations.
923 *
924 * mount system call
925 * It seems a bit dumb to copyinstr() the host and path here and then
926 * bcopy() them in mountnfs(), but I wanted to detect errors before
927 * doing the sockargs() call because sockargs() allocates an mbuf and
928 * an error after that means that I have to release the mbuf.
929 */
930 /* ARGSUSED */
931 static int
nfs_mount(struct mount * mp,char * path,caddr_t data,struct ucred * cred)932 nfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred)
933 {
934 int error;
935 struct nfs_args args;
936 struct sockaddr *nam;
937 struct vnode *vp;
938 char pth[MNAMELEN], hst[MNAMELEN];
939 size_t len;
940 u_char nfh[NFSX_V3FHMAX];
941
942 if (path == NULL) {
943 nfs_mountroot(mp);
944 return (0);
945 }
946 error = copyin(data, (caddr_t)&args, sizeof (struct nfs_args));
947 if (error)
948 return (error);
949 if (args.version != NFS_ARGSVERSION) {
950 #ifdef COMPAT_PRELITE2
951 /*
952 * If the argument version is unknown, then assume the
953 * caller is a pre-lite2 4.4BSD client and convert its
954 * arguments.
955 */
956 struct onfs_args oargs;
957 error = copyin(data, (caddr_t)&oargs, sizeof (struct onfs_args));
958 if (error)
959 return (error);
960 nfs_convert_oargs(&args,&oargs);
961 #else /* !COMPAT_PRELITE2 */
962 return (EPROGMISMATCH);
963 #endif /* COMPAT_PRELITE2 */
964 }
965 if (mp->mnt_flag & MNT_UPDATE) {
966 struct nfsmount *nmp = VFSTONFS(mp);
967
968 if (nmp == NULL)
969 return (EIO);
970 /*
971 * When doing an update, we can't change from or to
972 * v3, or change cookie translation, or rsize or wsize.
973 */
974 args.flags &= ~(NFSMNT_NFSV3 | NFSMNT_RSIZE | NFSMNT_WSIZE);
975 args.flags |= nmp->nm_flag & (NFSMNT_NFSV3);
976 nfs_decode_args(nmp, &args);
977 return (0);
978 }
979
980 /*
981 * Make the nfs_ip_paranoia sysctl serve as the default connection
982 * or no-connection mode for those protocols that support
983 * no-connection mode (the flag will be cleared later for protocols
984 * that do not support no-connection mode). This will allow a client
985 * to receive replies from a different IP then the request was
986 * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid),
987 * not 0.
988 */
989 if (nfs_ip_paranoia == 0)
990 args.flags |= NFSMNT_NOCONN;
991 if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX)
992 return (EINVAL);
993 error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize);
994 if (error)
995 return (error);
996 error = copyinstr(path, pth, MNAMELEN-1, &len);
997 if (error)
998 return (error);
999 bzero(&pth[len], MNAMELEN - len);
1000 error = copyinstr(args.hostname, hst, MNAMELEN-1, &len);
1001 if (error)
1002 return (error);
1003 bzero(&hst[len], MNAMELEN - len);
1004 /* sockargs() call must be after above copyin() calls */
1005 error = getsockaddr(&nam, (caddr_t)args.addr, args.addrlen);
1006 if (error)
1007 return (error);
1008 args.fh = nfh;
1009 error = mountnfs(&args, mp, nam, pth, hst, &vp);
1010 return (error);
1011 }
1012
1013 /*
1014 * Common code for mount and mountroot
1015 */
1016 static int
mountnfs(struct nfs_args * argp,struct mount * mp,struct sockaddr * nam,char * pth,char * hst,struct vnode ** vpp)1017 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1018 char *pth, char *hst, struct vnode **vpp)
1019 {
1020 struct nfsmount *nmp;
1021 struct nfsnode *np;
1022 int error;
1023 int rxcpu;
1024 int txcpu;
1025
1026 if (mp->mnt_flag & MNT_UPDATE) {
1027 nmp = VFSTONFS(mp);
1028 /* update paths, file handles, etc, here XXX */
1029 kfree(nam, M_SONAME);
1030 return (0);
1031 } else {
1032 nmp = kmalloc(sizeof(*nmp), M_NFS, M_WAITOK|M_ZERO);
1033 mtx_init_flags(&nmp->nm_rxlock, "nfsrx", MTXF_NOCOLLSTATS);
1034 mtx_init_flags(&nmp->nm_txlock, "nfstx", MTXF_NOCOLLSTATS);
1035 TAILQ_INIT(&nmp->nm_uidlruhead);
1036 TAILQ_INIT(&nmp->nm_bioq);
1037 TAILQ_INIT(&nmp->nm_reqq);
1038 TAILQ_INIT(&nmp->nm_reqtxq);
1039 TAILQ_INIT(&nmp->nm_reqrxq);
1040 mp->mnt_data = (qaddr_t)nmp;
1041 lwkt_token_init(&nmp->nm_token, "nfs_token");
1042 kmalloc_create_obj(&nmp->nm_mnode, "NFS inodes",
1043 sizeof(struct nfsnode));
1044 }
1045 vfs_getnewfsid(mp);
1046 nmp->nm_mountp = mp;
1047 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE;
1048 mp->mnt_kern_flag |= MNTK_THR_SYNC; /* new vsyncscan semantics */
1049
1050 lwkt_gettoken(&nmp->nm_token);
1051
1052 /*
1053 * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too
1054 * high, depending on whether we end up with negative offsets in
1055 * the client or server somewhere. 2GB-1 may be safer.
1056 *
1057 * For V3, nfs_fsinfo will adjust this as necessary. Assume maximum
1058 * that we can handle until we find out otherwise. Note that seek
1059 * offsets are signed.
1060 */
1061 if ((argp->flags & NFSMNT_NFSV3) == 0)
1062 nmp->nm_maxfilesize = 0xffffffffLL;
1063 else
1064 nmp->nm_maxfilesize = 0x7fffffffffffffffLL;
1065
1066 nmp->nm_timeo = NFS_TIMEO;
1067 nmp->nm_retry = NFS_RETRANS;
1068 nmp->nm_wsize = nfs_iosize(argp->flags & NFSMNT_NFSV3, argp->sotype);
1069 nmp->nm_rsize = nmp->nm_wsize;
1070 nmp->nm_readdirsize = NFS_READDIRSIZE;
1071 nmp->nm_numgrps = NFS_MAXGRPS;
1072 nmp->nm_readahead = NFS_DEFRAHEAD;
1073 nmp->nm_deadthresh = NFS_DEADTHRESH;
1074 nmp->nm_fhsize = argp->fhsize;
1075 bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1076 bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1077 nmp->nm_nam = nam;
1078 /* Set up the sockets and per-host congestion */
1079 nmp->nm_sotype = argp->sotype;
1080 nmp->nm_soproto = argp->proto;
1081 nmp->nm_cred = crhold(proc0.p_ucred);
1082
1083 nfs_decode_args(nmp, argp);
1084
1085 /*
1086 * For Connection based sockets (TCP,...) defer the connect until
1087 * the first request, in case the server is not responding.
1088 */
1089 if (nmp->nm_sotype == SOCK_DGRAM &&
1090 (error = nfs_connect(nmp, NULL)))
1091 goto bad;
1092
1093 /*
1094 * This is silly, but it has to be set so that vinifod() works.
1095 * We do not want to do an nfs_statfs() here since we can get
1096 * stuck on a dead server and we are holding a lock on the mount
1097 * point.
1098 */
1099 mp->mnt_stat.f_iosize =
1100 nfs_iosize(nmp->nm_flag & NFSMNT_NFSV3, nmp->nm_sotype);
1101
1102 /*
1103 * Install vop_ops for our vnops
1104 */
1105 vfs_add_vnodeops(mp, &nfsv2_vnode_vops, &mp->mnt_vn_norm_ops);
1106 vfs_add_vnodeops(mp, &nfsv2_spec_vops, &mp->mnt_vn_spec_ops);
1107 vfs_add_vnodeops(mp, &nfsv2_fifo_vops, &mp->mnt_vn_fifo_ops);
1108
1109 /*
1110 * A reference count is needed on the nfsnode representing the
1111 * remote root. If this object is not persistent, then backward
1112 * traversals of the mount point (i.e. "..") will not work if
1113 * the nfsnode gets flushed out of the cache. Ufs does not have
1114 * this problem, because one can identify root inodes by their
1115 * number == UFS_ROOTINO (2).
1116 */
1117 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, NULL);
1118 if (error)
1119 goto bad;
1120 *vpp = NFSTOV(np);
1121
1122 /*
1123 * Retrieval of mountpoint attributes is delayed until nfs_rot
1124 * or nfs_statfs are first called. This will happen either when
1125 * we first traverse the mount point or if somebody does a df(1).
1126 *
1127 * NFSSTA_GOTFSINFO is used to flag if we have successfully
1128 * retrieved mountpoint attributes. In the case of NFSv3 we
1129 * also flag static fsinfo.
1130 */
1131 if (*vpp != NULL)
1132 (*vpp)->v_type = VNON;
1133
1134 /*
1135 * Lose the lock but keep the ref.
1136 */
1137 vn_unlock(*vpp);
1138 lwkt_gettoken(&nfs_token);
1139 TAILQ_INSERT_TAIL(&nfs_mountq, nmp, nm_entry);
1140 lwkt_reltoken(&nfs_token);
1141
1142 switch(ncpus) {
1143 case 0:
1144 case 1:
1145 rxcpu = 0;
1146 txcpu = 0;
1147 break;
1148 case 2:
1149 rxcpu = 0;
1150 txcpu = 1;
1151 break;
1152 default:
1153 rxcpu = -1;
1154 txcpu = -1;
1155 break;
1156 }
1157
1158 /*
1159 * Start the reader and writer threads.
1160 */
1161 lwkt_create(nfssvc_iod_reader, nmp, &nmp->nm_rxthread,
1162 NULL, 0, rxcpu, "nfsiod_rx");
1163 lwkt_create(nfssvc_iod_writer, nmp, &nmp->nm_txthread,
1164 NULL, 0, txcpu, "nfsiod_tx");
1165 lwkt_reltoken(&nmp->nm_token);
1166 return (0);
1167 bad:
1168 nfs_disconnect(nmp);
1169 lwkt_reltoken(&nmp->nm_token);
1170 nfs_free_mount(nmp);
1171 return (error);
1172 }
1173
1174 /*
1175 * unmount system call
1176 */
1177 static int
nfs_unmount(struct mount * mp,int mntflags)1178 nfs_unmount(struct mount *mp, int mntflags)
1179 {
1180 struct nfsmount *nmp;
1181 int error, flags = 0;
1182
1183 nmp = VFSTONFS(mp);
1184 lwkt_gettoken(&nmp->nm_token);
1185 if (mntflags & MNT_FORCE) {
1186 flags |= FORCECLOSE;
1187 nmp->nm_flag |= NFSMNT_FORCE;
1188 }
1189
1190 /*
1191 * Goes something like this..
1192 * - Call vflush() to clear out vnodes for this file system
1193 * - Close the socket
1194 * - Free up the data structures
1195 */
1196 /* In the forced case, cancel any outstanding requests. */
1197 if (flags & FORCECLOSE) {
1198 error = nfs_nmcancelreqs(nmp);
1199 if (error) {
1200 kprintf("NFS: %s: Unable to cancel all requests\n",
1201 mp->mnt_stat.f_mntfromname);
1202 /* continue anyway */
1203 }
1204 }
1205
1206 /*
1207 * Must handshake with nfs_clientd() if it is active. XXX
1208 */
1209 nmp->nm_state |= NFSSTA_DISMINPROG;
1210
1211 /*
1212 * We hold 1 extra ref on the root vnode; see comment in mountnfs().
1213 *
1214 * If this doesn't work and we are doing a forced unmount we continue
1215 * anyway.
1216 */
1217 error = vflush(mp, 1, flags);
1218 if (error) {
1219 nmp->nm_state &= ~NFSSTA_DISMINPROG;
1220 if ((flags & FORCECLOSE) == 0) {
1221 lwkt_reltoken(&nmp->nm_token);
1222 return (error);
1223 }
1224 }
1225
1226 /*
1227 * We are now committed to the unmount.
1228 * For NQNFS, let the server daemon free the nfsmount structure.
1229 */
1230 if (nmp->nm_flag & NFSMNT_KERB)
1231 nmp->nm_state |= NFSSTA_DISMNT;
1232 nfssvc_iod_stop1(nmp);
1233 nfs_disconnect(nmp);
1234 nfssvc_iod_stop2(nmp);
1235
1236 lwkt_gettoken(&nfs_token);
1237 TAILQ_REMOVE(&nfs_mountq, nmp, nm_entry);
1238 lwkt_reltoken(&nfs_token);
1239
1240 lwkt_reltoken(&nmp->nm_token);
1241
1242 if ((nmp->nm_flag & NFSMNT_KERB) == 0) {
1243 nfs_free_mount(nmp);
1244 }
1245 return (0);
1246 }
1247
1248 void
nfs_free_mount(struct nfsmount * nmp)1249 nfs_free_mount(struct nfsmount *nmp)
1250 {
1251 if (nmp->nm_cred) {
1252 crfree(nmp->nm_cred);
1253 nmp->nm_cred = NULL;
1254 }
1255 if (nmp->nm_nam) {
1256 kfree(nmp->nm_nam, M_SONAME);
1257 nmp->nm_nam = NULL;
1258 }
1259 if (nmp->nm_mnode_obj)
1260 kmalloc_destroy_obj(&nmp->nm_mnode);
1261 kfree(nmp, M_NFS);
1262 }
1263
1264 /*
1265 * Return root of a filesystem
1266 */
1267 static int
nfs_root(struct mount * mp,struct vnode ** vpp)1268 nfs_root(struct mount *mp, struct vnode **vpp)
1269 {
1270 struct vnode *vp;
1271 struct nfsmount *nmp;
1272 struct vattr attrs;
1273 struct nfsnode *np;
1274 int error;
1275
1276 nmp = VFSTONFS(mp);
1277 lwkt_gettoken(&nmp->nm_token);
1278 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, NULL);
1279 if (error) {
1280 lwkt_reltoken(&nmp->nm_token);
1281 return (error);
1282 }
1283 vp = NFSTOV(np);
1284
1285 /*
1286 * Get transfer parameters and root vnode attributes
1287 *
1288 * NOTE: nfs_fsinfo() is expected to override the default
1289 * f_iosize we set.
1290 */
1291 if ((nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
1292 if (nmp->nm_flag & NFSMNT_NFSV3) {
1293 mp->mnt_stat.f_iosize = nfs_iosize(1, nmp->nm_sotype);
1294 error = nfs_fsinfo(nmp, vp, curthread);
1295 } else {
1296 if ((error = VOP_GETATTR(vp, &attrs)) == 0)
1297 nmp->nm_state |= NFSSTA_GOTFSINFO;
1298
1299 }
1300 } else {
1301 /*
1302 * The root vnode is usually cached by the namecache so do not
1303 * try to avoid going over the wire even if we have previous
1304 * information cached. A stale NFS mount can loop
1305 * forever resolving the root vnode if we return no-error when
1306 * there is in fact an error.
1307 */
1308 np->n_attrstamp = 0;
1309 error = VOP_GETATTR(vp, &attrs);
1310 }
1311 if (vp->v_type == VNON)
1312 nfs_setvtype(vp, VDIR);
1313 vsetflags(vp, VROOT);
1314 if (error)
1315 vput(vp);
1316 else
1317 *vpp = vp;
1318 lwkt_reltoken(&nmp->nm_token);
1319 return (error);
1320 }
1321
1322 struct scaninfo {
1323 int rescan;
1324 int waitfor;
1325 int allerror;
1326 };
1327
1328 static int nfs_sync_scan2(struct mount *mp, struct vnode *vp, void *data);
1329
1330 /*
1331 * Flush out the buffer cache
1332 */
1333 /* ARGSUSED */
1334 static int
nfs_sync(struct mount * mp,int waitfor)1335 nfs_sync(struct mount *mp, int waitfor)
1336 {
1337 struct nfsmount *nmp = VFSTONFS(mp);
1338 struct scaninfo scaninfo;
1339 int error;
1340
1341 scaninfo.rescan = 1;
1342 scaninfo.waitfor = waitfor;
1343 scaninfo.allerror = 0;
1344
1345 /*
1346 * Force stale buffer cache information to be flushed.
1347 */
1348 lwkt_gettoken(&nmp->nm_token);
1349 error = 0;
1350 if ((waitfor & MNT_LAZY) == 0) {
1351 while (error == 0 && scaninfo.rescan) {
1352 scaninfo.rescan = 0;
1353 error = vsyncscan(mp, VMSC_GETVP,
1354 nfs_sync_scan2, &scaninfo);
1355 }
1356 }
1357 lwkt_reltoken(&nmp->nm_token);
1358 return(error);
1359 }
1360
1361 static int
nfs_sync_scan2(struct mount * mp,struct vnode * vp,void * data)1362 nfs_sync_scan2(struct mount *mp, struct vnode *vp, void *data)
1363 {
1364 struct scaninfo *info = data;
1365 int error;
1366
1367 if (vp->v_type == VNON || vp->v_type == VBAD)
1368 return(0);
1369 error = VOP_FSYNC(vp, info->waitfor, 0);
1370 if (error)
1371 info->allerror = error;
1372 return(0);
1373 }
1374
1375