1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /*
26 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
27 * All Rights Reserved
28 */
29
30 #include <sys/param.h>
31 #include <sys/types.h>
32 #include <sys/systm.h>
33 #include <sys/cred.h>
34 #include <sys/vfs.h>
35 #include <sys/vfs_opreg.h>
36 #include <sys/vnode.h>
37 #include <sys/pathname.h>
38 #include <sys/sysmacros.h>
39 #include <sys/kmem.h>
40 #include <sys/mkdev.h>
41 #include <sys/mount.h>
42 #include <sys/statvfs.h>
43 #include <sys/errno.h>
44 #include <sys/debug.h>
45 #include <sys/cmn_err.h>
46 #include <sys/utsname.h>
47 #include <sys/bootconf.h>
48 #include <sys/modctl.h>
49 #include <sys/acl.h>
50 #include <sys/flock.h>
51 #include <sys/time.h>
52 #include <sys/disp.h>
53 #include <sys/policy.h>
54 #include <sys/socket.h>
55 #include <sys/netconfig.h>
56 #include <sys/dnlc.h>
57 #include <sys/list.h>
58 #include <sys/mntent.h>
59 #include <sys/tsol/label.h>
60
61 #include <rpc/types.h>
62 #include <rpc/auth.h>
63 #include <rpc/rpcsec_gss.h>
64 #include <rpc/clnt.h>
65
66 #include <nfs/nfs.h>
67 #include <nfs/nfs_clnt.h>
68 #include <nfs/mount.h>
69 #include <nfs/nfs_acl.h>
70
71 #include <fs/fs_subr.h>
72
73 #include <nfs/nfs4.h>
74 #include <nfs/rnode4.h>
75 #include <nfs/nfs4_clnt.h>
76 #include <sys/fs/autofs.h>
77
78 #include <sys/sdt.h>
79
80
81 /*
82 * Arguments passed to thread to free data structures from forced unmount.
83 */
84
85 typedef struct {
86 vfs_t *fm_vfsp;
87 int fm_flag;
88 cred_t *fm_cr;
89 } freemountargs_t;
90
91 static void async_free_mount(vfs_t *, int, cred_t *);
92 static void nfs4_free_mount(vfs_t *, int, cred_t *);
93 static void nfs4_free_mount_thread(freemountargs_t *);
94 static int nfs4_chkdup_servinfo4(servinfo4_t *, servinfo4_t *);
95
96 /*
97 * From rpcsec module (common/rpcsec).
98 */
99 extern int sec_clnt_loadinfo(struct sec_data *, struct sec_data **, model_t);
100 extern void sec_clnt_freeinfo(struct sec_data *);
101
102 /*
103 * The order and contents of this structure must be kept in sync with that of
104 * rfsreqcnt_v4_tmpl in nfs_stats.c
105 */
106 static char *rfsnames_v4[] = {
107 "null", "compound", "reserved", "access", "close", "commit", "create",
108 "delegpurge", "delegreturn", "getattr", "getfh", "link", "lock",
109 "lockt", "locku", "lookup", "lookupp", "nverify", "open", "openattr",
110 "open_confirm", "open_downgrade", "putfh", "putpubfh", "putrootfh",
111 "read", "readdir", "readlink", "remove", "rename", "renew",
112 "restorefh", "savefh", "secinfo", "setattr", "setclientid",
113 "setclientid_confirm", "verify", "write"
114 };
115
116 /*
117 * nfs4_max_mount_retry is the number of times the client will redrive
118 * a mount compound before giving up and returning failure. The intent
119 * is to redrive mount compounds which fail NFS4ERR_STALE so that
120 * if a component of the server path being mounted goes stale, it can
121 * "recover" by redriving the mount compund (LOOKUP ops). This recovery
122 * code is needed outside of the recovery framework because mount is a
123 * special case. The client doesn't create vnodes/rnodes for components
124 * of the server path being mounted. The recovery code recovers real
125 * client objects, not STALE FHs which map to components of the server
126 * path being mounted.
127 *
128 * We could just fail the mount on the first time, but that would
129 * instantly trigger failover (from nfs4_mount), and the client should
130 * try to re-lookup the STALE FH before doing failover. The easiest
131 * way to "re-lookup" is to simply redrive the mount compound.
132 */
133 static int nfs4_max_mount_retry = 2;
134
135 /*
136 * nfs4 vfs operations.
137 */
138 int nfs4_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *);
139 static int nfs4_unmount(vfs_t *, int, cred_t *);
140 static int nfs4_root(vfs_t *, vnode_t **);
141 static int nfs4_statvfs(vfs_t *, struct statvfs64 *);
142 static int nfs4_sync(vfs_t *, short, cred_t *);
143 static int nfs4_vget(vfs_t *, vnode_t **, fid_t *);
144 static int nfs4_mountroot(vfs_t *, whymountroot_t);
145 static void nfs4_freevfs(vfs_t *);
146
147 static int nfs4rootvp(vnode_t **, vfs_t *, struct servinfo4 *,
148 int, cred_t *, zone_t *);
149
150 vfsops_t *nfs4_vfsops;
151
152 int nfs4_vfsinit(void);
153 void nfs4_vfsfini(void);
154 static void nfs4setclientid_init(void);
155 static void nfs4setclientid_fini(void);
156 static void nfs4setclientid_otw(mntinfo4_t *, servinfo4_t *, cred_t *,
157 struct nfs4_server *, nfs4_error_t *, int *);
158 static void destroy_nfs4_server(nfs4_server_t *);
159 static void remove_mi(nfs4_server_t *, mntinfo4_t *);
160
161 extern void nfs4_ephemeral_init(void);
162 extern void nfs4_ephemeral_fini(void);
163
164 /* referral related routines */
165 static servinfo4_t *copy_svp(servinfo4_t *);
166 static void free_knconf_contents(struct knetconfig *k);
167 static char *extract_referral_point(const char *, int);
168 static void setup_newsvpath(servinfo4_t *, int);
169 static void update_servinfo4(servinfo4_t *, fs_location4 *,
170 struct nfs_fsl_info *, char *, int);
171
172 /*
173 * Initialize the vfs structure
174 */
175
176 static int nfs4fstyp;
177
178
179 /*
180 * Debug variable to check for rdma based
181 * transport startup and cleanup. Controlled
182 * through /etc/system. Off by default.
183 */
184 extern int rdma_debug;
185
186 int
nfs4init(int fstyp,char * name)187 nfs4init(int fstyp, char *name)
188 {
189 static const fs_operation_def_t nfs4_vfsops_template[] = {
190 VFSNAME_MOUNT, { .vfs_mount = nfs4_mount },
191 VFSNAME_UNMOUNT, { .vfs_unmount = nfs4_unmount },
192 VFSNAME_ROOT, { .vfs_root = nfs4_root },
193 VFSNAME_STATVFS, { .vfs_statvfs = nfs4_statvfs },
194 VFSNAME_SYNC, { .vfs_sync = nfs4_sync },
195 VFSNAME_VGET, { .vfs_vget = nfs4_vget },
196 VFSNAME_MOUNTROOT, { .vfs_mountroot = nfs4_mountroot },
197 VFSNAME_FREEVFS, { .vfs_freevfs = nfs4_freevfs },
198 NULL, NULL
199 };
200 int error;
201
202 nfs4_vfsops = NULL;
203 nfs4_vnodeops = NULL;
204 nfs4_trigger_vnodeops = NULL;
205
206 error = vfs_setfsops(fstyp, nfs4_vfsops_template, &nfs4_vfsops);
207 if (error != 0) {
208 zcmn_err(GLOBAL_ZONEID, CE_WARN,
209 "nfs4init: bad vfs ops template");
210 goto out;
211 }
212
213 error = vn_make_ops(name, nfs4_vnodeops_template, &nfs4_vnodeops);
214 if (error != 0) {
215 zcmn_err(GLOBAL_ZONEID, CE_WARN,
216 "nfs4init: bad vnode ops template");
217 goto out;
218 }
219
220 error = vn_make_ops("nfs4_trigger", nfs4_trigger_vnodeops_template,
221 &nfs4_trigger_vnodeops);
222 if (error != 0) {
223 zcmn_err(GLOBAL_ZONEID, CE_WARN,
224 "nfs4init: bad trigger vnode ops template");
225 goto out;
226 }
227
228 nfs4fstyp = fstyp;
229 (void) nfs4_vfsinit();
230 (void) nfs4_init_dot_entries();
231
232 out:
233 if (error) {
234 if (nfs4_trigger_vnodeops != NULL)
235 vn_freevnodeops(nfs4_trigger_vnodeops);
236
237 if (nfs4_vnodeops != NULL)
238 vn_freevnodeops(nfs4_vnodeops);
239
240 (void) vfs_freevfsops_by_type(fstyp);
241 }
242
243 return (error);
244 }
245
246 void
nfs4fini(void)247 nfs4fini(void)
248 {
249 (void) nfs4_destroy_dot_entries();
250 nfs4_vfsfini();
251 }
252
253 /*
254 * Create a new sec_data structure to store AUTH_DH related data:
255 * netname, syncaddr, knetconfig. There is no AUTH_F_RPCTIMESYNC
256 * flag set for NFS V4 since we are avoiding to contact the rpcbind
257 * daemon and is using the IP time service (IPPORT_TIMESERVER).
258 *
259 * sec_data can be freed by sec_clnt_freeinfo().
260 */
261 static struct sec_data *
create_authdh_data(char * netname,int nlen,struct netbuf * syncaddr,struct knetconfig * knconf)262 create_authdh_data(char *netname, int nlen, struct netbuf *syncaddr,
263 struct knetconfig *knconf) {
264 struct sec_data *secdata;
265 dh_k4_clntdata_t *data;
266 char *pf, *p;
267
268 if (syncaddr == NULL || syncaddr->buf == NULL || nlen == 0)
269 return (NULL);
270
271 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP);
272 secdata->flags = 0;
273
274 data = kmem_alloc(sizeof (*data), KM_SLEEP);
275
276 data->syncaddr.maxlen = syncaddr->maxlen;
277 data->syncaddr.len = syncaddr->len;
278 data->syncaddr.buf = (char *)kmem_alloc(syncaddr->len, KM_SLEEP);
279 bcopy(syncaddr->buf, data->syncaddr.buf, syncaddr->len);
280
281 /*
282 * duplicate the knconf information for the
283 * new opaque data.
284 */
285 data->knconf = kmem_alloc(sizeof (*knconf), KM_SLEEP);
286 *data->knconf = *knconf;
287 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
288 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
289 bcopy(knconf->knc_protofmly, pf, KNC_STRSIZE);
290 bcopy(knconf->knc_proto, p, KNC_STRSIZE);
291 data->knconf->knc_protofmly = pf;
292 data->knconf->knc_proto = p;
293
294 /* move server netname to the sec_data structure */
295 data->netname = kmem_alloc(nlen, KM_SLEEP);
296 bcopy(netname, data->netname, nlen);
297 data->netnamelen = (int)nlen;
298
299 secdata->secmod = AUTH_DH;
300 secdata->rpcflavor = AUTH_DH;
301 secdata->data = (caddr_t)data;
302
303 return (secdata);
304 }
305
306 /*
307 * Returns (deep) copy of sec_data_t. Allocates all memory required; caller
308 * is responsible for freeing.
309 */
310 sec_data_t *
copy_sec_data(sec_data_t * fsecdata)311 copy_sec_data(sec_data_t *fsecdata) {
312 sec_data_t *tsecdata;
313
314 if (fsecdata == NULL)
315 return (NULL);
316
317 if (fsecdata->rpcflavor == AUTH_DH) {
318 dh_k4_clntdata_t *fdata = (dh_k4_clntdata_t *)fsecdata->data;
319
320 if (fdata == NULL)
321 return (NULL);
322
323 tsecdata = (sec_data_t *)create_authdh_data(fdata->netname,
324 fdata->netnamelen, &fdata->syncaddr, fdata->knconf);
325
326 return (tsecdata);
327 }
328
329 tsecdata = kmem_zalloc(sizeof (sec_data_t), KM_SLEEP);
330
331 tsecdata->secmod = fsecdata->secmod;
332 tsecdata->rpcflavor = fsecdata->rpcflavor;
333 tsecdata->flags = fsecdata->flags;
334 tsecdata->uid = fsecdata->uid;
335
336 if (fsecdata->rpcflavor == RPCSEC_GSS) {
337 gss_clntdata_t *gcd = (gss_clntdata_t *)fsecdata->data;
338
339 tsecdata->data = (caddr_t)copy_sec_data_gss(gcd);
340 } else {
341 tsecdata->data = NULL;
342 }
343
344 return (tsecdata);
345 }
346
347 gss_clntdata_t *
copy_sec_data_gss(gss_clntdata_t * fdata)348 copy_sec_data_gss(gss_clntdata_t *fdata)
349 {
350 gss_clntdata_t *tdata;
351
352 if (fdata == NULL)
353 return (NULL);
354
355 tdata = kmem_zalloc(sizeof (gss_clntdata_t), KM_SLEEP);
356
357 tdata->mechanism.length = fdata->mechanism.length;
358 tdata->mechanism.elements = kmem_zalloc(fdata->mechanism.length,
359 KM_SLEEP);
360 bcopy(fdata->mechanism.elements, tdata->mechanism.elements,
361 fdata->mechanism.length);
362
363 tdata->service = fdata->service;
364
365 (void) strcpy(tdata->uname, fdata->uname);
366 (void) strcpy(tdata->inst, fdata->inst);
367 (void) strcpy(tdata->realm, fdata->realm);
368
369 tdata->qop = fdata->qop;
370
371 return (tdata);
372 }
373
374 static int
nfs4_chkdup_servinfo4(servinfo4_t * svp_head,servinfo4_t * svp)375 nfs4_chkdup_servinfo4(servinfo4_t *svp_head, servinfo4_t *svp)
376 {
377 servinfo4_t *si;
378
379 /*
380 * Iterate over the servinfo4 list to make sure
381 * we do not have a duplicate. Skip any servinfo4
382 * that has been marked "NOT IN USE"
383 */
384 for (si = svp_head; si; si = si->sv_next) {
385 (void) nfs_rw_enter_sig(&si->sv_lock, RW_READER, 0);
386 if (si->sv_flags & SV4_NOTINUSE) {
387 nfs_rw_exit(&si->sv_lock);
388 continue;
389 }
390 nfs_rw_exit(&si->sv_lock);
391 if (si == svp)
392 continue;
393 if (si->sv_addr.len == svp->sv_addr.len &&
394 strcmp(si->sv_knconf->knc_protofmly,
395 svp->sv_knconf->knc_protofmly) == 0 &&
396 bcmp(si->sv_addr.buf, svp->sv_addr.buf,
397 si->sv_addr.len) == 0) {
398 /* it's a duplicate */
399 return (1);
400 }
401 }
402 /* it's not a duplicate */
403 return (0);
404 }
405
406 void
nfs4_free_args(struct nfs_args * nargs)407 nfs4_free_args(struct nfs_args *nargs)
408 {
409 if (nargs->knconf) {
410 if (nargs->knconf->knc_protofmly)
411 kmem_free(nargs->knconf->knc_protofmly,
412 KNC_STRSIZE);
413 if (nargs->knconf->knc_proto)
414 kmem_free(nargs->knconf->knc_proto, KNC_STRSIZE);
415 kmem_free(nargs->knconf, sizeof (*nargs->knconf));
416 nargs->knconf = NULL;
417 }
418
419 if (nargs->fh) {
420 kmem_free(nargs->fh, strlen(nargs->fh) + 1);
421 nargs->fh = NULL;
422 }
423
424 if (nargs->hostname) {
425 kmem_free(nargs->hostname, strlen(nargs->hostname) + 1);
426 nargs->hostname = NULL;
427 }
428
429 if (nargs->addr) {
430 if (nargs->addr->buf) {
431 ASSERT(nargs->addr->len);
432 kmem_free(nargs->addr->buf, nargs->addr->len);
433 }
434 kmem_free(nargs->addr, sizeof (struct netbuf));
435 nargs->addr = NULL;
436 }
437
438 if (nargs->syncaddr) {
439 ASSERT(nargs->syncaddr->len);
440 if (nargs->syncaddr->buf) {
441 ASSERT(nargs->syncaddr->len);
442 kmem_free(nargs->syncaddr->buf, nargs->syncaddr->len);
443 }
444 kmem_free(nargs->syncaddr, sizeof (struct netbuf));
445 nargs->syncaddr = NULL;
446 }
447
448 if (nargs->netname) {
449 kmem_free(nargs->netname, strlen(nargs->netname) + 1);
450 nargs->netname = NULL;
451 }
452
453 if (nargs->nfs_ext_u.nfs_extA.secdata) {
454 sec_clnt_freeinfo(
455 nargs->nfs_ext_u.nfs_extA.secdata);
456 nargs->nfs_ext_u.nfs_extA.secdata = NULL;
457 }
458 }
459
460
461 int
nfs4_copyin(char * data,int datalen,struct nfs_args * nargs)462 nfs4_copyin(char *data, int datalen, struct nfs_args *nargs)
463 {
464
465 int error;
466 size_t hlen; /* length of hostname */
467 size_t nlen; /* length of netname */
468 char netname[MAXNETNAMELEN+1]; /* server's netname */
469 struct netbuf addr; /* server's address */
470 struct netbuf syncaddr; /* AUTH_DES time sync addr */
471 struct knetconfig *knconf; /* transport structure */
472 struct sec_data *secdata = NULL; /* security data */
473 STRUCT_DECL(nfs_args, args); /* nfs mount arguments */
474 STRUCT_DECL(knetconfig, knconf_tmp);
475 STRUCT_DECL(netbuf, addr_tmp);
476 int flags;
477 char *p, *pf;
478 struct pathname pn;
479 char *userbufptr;
480
481
482 bzero(nargs, sizeof (*nargs));
483
484 STRUCT_INIT(args, get_udatamodel());
485 bzero(STRUCT_BUF(args), SIZEOF_STRUCT(nfs_args, DATAMODEL_NATIVE));
486 if (copyin(data, STRUCT_BUF(args), MIN(datalen,
487 STRUCT_SIZE(args))))
488 return (EFAULT);
489
490 nargs->wsize = STRUCT_FGET(args, wsize);
491 nargs->rsize = STRUCT_FGET(args, rsize);
492 nargs->timeo = STRUCT_FGET(args, timeo);
493 nargs->retrans = STRUCT_FGET(args, retrans);
494 nargs->acregmin = STRUCT_FGET(args, acregmin);
495 nargs->acregmax = STRUCT_FGET(args, acregmax);
496 nargs->acdirmin = STRUCT_FGET(args, acdirmin);
497 nargs->acdirmax = STRUCT_FGET(args, acdirmax);
498
499 flags = STRUCT_FGET(args, flags);
500 nargs->flags = flags;
501
502 addr.buf = NULL;
503 syncaddr.buf = NULL;
504
505
506 /*
507 * Allocate space for a knetconfig structure and
508 * its strings and copy in from user-land.
509 */
510 knconf = kmem_zalloc(sizeof (*knconf), KM_SLEEP);
511 STRUCT_INIT(knconf_tmp, get_udatamodel());
512 if (copyin(STRUCT_FGETP(args, knconf), STRUCT_BUF(knconf_tmp),
513 STRUCT_SIZE(knconf_tmp))) {
514 kmem_free(knconf, sizeof (*knconf));
515 return (EFAULT);
516 }
517
518 knconf->knc_semantics = STRUCT_FGET(knconf_tmp, knc_semantics);
519 knconf->knc_protofmly = STRUCT_FGETP(knconf_tmp, knc_protofmly);
520 knconf->knc_proto = STRUCT_FGETP(knconf_tmp, knc_proto);
521 if (get_udatamodel() != DATAMODEL_LP64) {
522 knconf->knc_rdev = expldev(STRUCT_FGET(knconf_tmp, knc_rdev));
523 } else {
524 knconf->knc_rdev = STRUCT_FGET(knconf_tmp, knc_rdev);
525 }
526
527 pf = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
528 p = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
529 error = copyinstr(knconf->knc_protofmly, pf, KNC_STRSIZE, NULL);
530 if (error) {
531 kmem_free(pf, KNC_STRSIZE);
532 kmem_free(p, KNC_STRSIZE);
533 kmem_free(knconf, sizeof (*knconf));
534 return (error);
535 }
536
537 error = copyinstr(knconf->knc_proto, p, KNC_STRSIZE, NULL);
538 if (error) {
539 kmem_free(pf, KNC_STRSIZE);
540 kmem_free(p, KNC_STRSIZE);
541 kmem_free(knconf, sizeof (*knconf));
542 return (error);
543 }
544
545
546 knconf->knc_protofmly = pf;
547 knconf->knc_proto = p;
548
549 nargs->knconf = knconf;
550
551 /*
552 * Get server address
553 */
554 STRUCT_INIT(addr_tmp, get_udatamodel());
555 if (copyin(STRUCT_FGETP(args, addr), STRUCT_BUF(addr_tmp),
556 STRUCT_SIZE(addr_tmp))) {
557 error = EFAULT;
558 goto errout;
559 }
560
561 nargs->addr = kmem_zalloc(sizeof (struct netbuf), KM_SLEEP);
562 userbufptr = STRUCT_FGETP(addr_tmp, buf);
563 addr.len = STRUCT_FGET(addr_tmp, len);
564 addr.buf = kmem_alloc(addr.len, KM_SLEEP);
565 addr.maxlen = addr.len;
566 if (copyin(userbufptr, addr.buf, addr.len)) {
567 kmem_free(addr.buf, addr.len);
568 error = EFAULT;
569 goto errout;
570 }
571 bcopy(&addr, nargs->addr, sizeof (struct netbuf));
572
573 /*
574 * Get the root fhandle
575 */
576 error = pn_get(STRUCT_FGETP(args, fh), UIO_USERSPACE, &pn);
577 if (error)
578 goto errout;
579
580 /* Volatile fh: keep server paths, so use actual-size strings */
581 nargs->fh = kmem_alloc(pn.pn_pathlen + 1, KM_SLEEP);
582 bcopy(pn.pn_path, nargs->fh, pn.pn_pathlen);
583 nargs->fh[pn.pn_pathlen] = '\0';
584 pn_free(&pn);
585
586
587 /*
588 * Get server's hostname
589 */
590 if (flags & NFSMNT_HOSTNAME) {
591 error = copyinstr(STRUCT_FGETP(args, hostname),
592 netname, sizeof (netname), &hlen);
593 if (error)
594 goto errout;
595 nargs->hostname = kmem_zalloc(hlen, KM_SLEEP);
596 (void) strcpy(nargs->hostname, netname);
597
598 } else {
599 nargs->hostname = NULL;
600 }
601
602
603 /*
604 * If there are syncaddr and netname data, load them in. This is
605 * to support data needed for NFSV4 when AUTH_DH is the negotiated
606 * flavor via SECINFO. (instead of using MOUNT protocol in V3).
607 */
608 netname[0] = '\0';
609 if (flags & NFSMNT_SECURE) {
610
611 /* get syncaddr */
612 STRUCT_INIT(addr_tmp, get_udatamodel());
613 if (copyin(STRUCT_FGETP(args, syncaddr), STRUCT_BUF(addr_tmp),
614 STRUCT_SIZE(addr_tmp))) {
615 error = EINVAL;
616 goto errout;
617 }
618 userbufptr = STRUCT_FGETP(addr_tmp, buf);
619 syncaddr.len = STRUCT_FGET(addr_tmp, len);
620 syncaddr.buf = kmem_alloc(syncaddr.len, KM_SLEEP);
621 syncaddr.maxlen = syncaddr.len;
622 if (copyin(userbufptr, syncaddr.buf, syncaddr.len)) {
623 kmem_free(syncaddr.buf, syncaddr.len);
624 error = EFAULT;
625 goto errout;
626 }
627
628 nargs->syncaddr = kmem_alloc(sizeof (struct netbuf), KM_SLEEP);
629 bcopy(&syncaddr, nargs->syncaddr, sizeof (struct netbuf));
630
631 /* get server's netname */
632 if (copyinstr(STRUCT_FGETP(args, netname), netname,
633 sizeof (netname), &nlen)) {
634 error = EFAULT;
635 goto errout;
636 }
637
638 netname[nlen] = '\0';
639 nargs->netname = kmem_zalloc(nlen, KM_SLEEP);
640 (void) strcpy(nargs->netname, netname);
641 }
642
643 /*
644 * Get the extention data which has the security data structure.
645 * This includes data for AUTH_SYS as well.
646 */
647 if (flags & NFSMNT_NEWARGS) {
648 nargs->nfs_args_ext = STRUCT_FGET(args, nfs_args_ext);
649 if (nargs->nfs_args_ext == NFS_ARGS_EXTA ||
650 nargs->nfs_args_ext == NFS_ARGS_EXTB) {
651 /*
652 * Indicating the application is using the new
653 * sec_data structure to pass in the security
654 * data.
655 */
656 if (STRUCT_FGETP(args,
657 nfs_ext_u.nfs_extA.secdata) != NULL) {
658 error = sec_clnt_loadinfo(
659 (struct sec_data *)STRUCT_FGETP(args,
660 nfs_ext_u.nfs_extA.secdata),
661 &secdata, get_udatamodel());
662 }
663 nargs->nfs_ext_u.nfs_extA.secdata = secdata;
664 }
665 }
666
667 if (error)
668 goto errout;
669
670 /*
671 * Failover support:
672 *
673 * We may have a linked list of nfs_args structures,
674 * which means the user is looking for failover. If
675 * the mount is either not "read-only" or "soft",
676 * we want to bail out with EINVAL.
677 */
678 if (nargs->nfs_args_ext == NFS_ARGS_EXTB)
679 nargs->nfs_ext_u.nfs_extB.next =
680 STRUCT_FGETP(args, nfs_ext_u.nfs_extB.next);
681
682 errout:
683 if (error)
684 nfs4_free_args(nargs);
685
686 return (error);
687 }
688
689
690 /*
691 * nfs mount vfsop
692 * Set up mount info record and attach it to vfs struct.
693 */
694 int
nfs4_mount(vfs_t * vfsp,vnode_t * mvp,struct mounta * uap,cred_t * cr)695 nfs4_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
696 {
697 char *data = uap->dataptr;
698 int error;
699 vnode_t *rtvp; /* the server's root */
700 mntinfo4_t *mi; /* mount info, pointed at by vfs */
701 struct knetconfig *rdma_knconf; /* rdma transport structure */
702 rnode4_t *rp;
703 struct servinfo4 *svp; /* nfs server info */
704 struct servinfo4 *svp_tail = NULL; /* previous nfs server info */
705 struct servinfo4 *svp_head; /* first nfs server info */
706 struct servinfo4 *svp_2ndlast; /* 2nd last in server info list */
707 struct sec_data *secdata; /* security data */
708 struct nfs_args *args = NULL;
709 int flags, addr_type, removed;
710 zone_t *zone = nfs_zone();
711 nfs4_error_t n4e;
712 zone_t *mntzone = NULL;
713
714 if (secpolicy_fs_mount(cr, mvp, vfsp) != 0)
715 return (EPERM);
716 if (mvp->v_type != VDIR)
717 return (ENOTDIR);
718
719 /*
720 * get arguments
721 *
722 * nfs_args is now versioned and is extensible, so
723 * uap->datalen might be different from sizeof (args)
724 * in a compatible situation.
725 */
726 more:
727 if (!(uap->flags & MS_SYSSPACE)) {
728 if (args == NULL)
729 args = kmem_zalloc(sizeof (struct nfs_args), KM_SLEEP);
730 else
731 nfs4_free_args(args);
732 error = nfs4_copyin(data, uap->datalen, args);
733 if (error) {
734 if (args) {
735 kmem_free(args, sizeof (*args));
736 }
737 return (error);
738 }
739 } else {
740 args = (struct nfs_args *)data;
741 }
742
743 flags = args->flags;
744
745 /*
746 * If the request changes the locking type, disallow the remount,
747 * because it's questionable whether we can transfer the
748 * locking state correctly.
749 */
750 if (uap->flags & MS_REMOUNT) {
751 if (!(uap->flags & MS_SYSSPACE)) {
752 nfs4_free_args(args);
753 kmem_free(args, sizeof (*args));
754 }
755 if ((mi = VFTOMI4(vfsp)) != NULL) {
756 uint_t new_mi_llock;
757 uint_t old_mi_llock;
758 new_mi_llock = (flags & NFSMNT_LLOCK) ? 1 : 0;
759 old_mi_llock = (mi->mi_flags & MI4_LLOCK) ? 1 : 0;
760 if (old_mi_llock != new_mi_llock)
761 return (EBUSY);
762 }
763 return (0);
764 }
765
766 /*
767 * For ephemeral mount trigger stub vnodes, we have two problems
768 * to solve: racing threads will likely fail the v_count check, and
769 * we want only one to proceed with the mount.
770 *
771 * For stubs, if the mount has already occurred (via a racing thread),
772 * just return success. If not, skip the v_count check and proceed.
773 * Note that we are already serialised at this point.
774 */
775 mutex_enter(&mvp->v_lock);
776 if (vn_matchops(mvp, nfs4_trigger_vnodeops)) {
777 /* mntpt is a v4 stub vnode */
778 ASSERT(RP_ISSTUB(VTOR4(mvp)));
779 ASSERT(!(uap->flags & MS_OVERLAY));
780 ASSERT(!(mvp->v_flag & VROOT));
781 if (vn_mountedvfs(mvp) != NULL) {
782 /* ephemeral mount has already occurred */
783 ASSERT(uap->flags & MS_SYSSPACE);
784 mutex_exit(&mvp->v_lock);
785 return (0);
786 }
787 } else {
788 /* mntpt is a non-v4 or v4 non-stub vnode */
789 if (!(uap->flags & MS_OVERLAY) &&
790 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
791 mutex_exit(&mvp->v_lock);
792 if (!(uap->flags & MS_SYSSPACE)) {
793 nfs4_free_args(args);
794 kmem_free(args, sizeof (*args));
795 }
796 return (EBUSY);
797 }
798 }
799 mutex_exit(&mvp->v_lock);
800
801 /* make sure things are zeroed for errout: */
802 rtvp = NULL;
803 mi = NULL;
804 secdata = NULL;
805
806 /*
807 * A valid knetconfig structure is required.
808 */
809 if (!(flags & NFSMNT_KNCONF) ||
810 args->knconf == NULL || args->knconf->knc_protofmly == NULL ||
811 args->knconf->knc_proto == NULL ||
812 (strcmp(args->knconf->knc_proto, NC_UDP) == 0)) {
813 if (!(uap->flags & MS_SYSSPACE)) {
814 nfs4_free_args(args);
815 kmem_free(args, sizeof (*args));
816 }
817 return (EINVAL);
818 }
819
820 if ((strlen(args->knconf->knc_protofmly) >= KNC_STRSIZE) ||
821 (strlen(args->knconf->knc_proto) >= KNC_STRSIZE)) {
822 if (!(uap->flags & MS_SYSSPACE)) {
823 nfs4_free_args(args);
824 kmem_free(args, sizeof (*args));
825 }
826 return (EINVAL);
827 }
828
829 /*
830 * Allocate a servinfo4 struct.
831 */
832 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP);
833 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL);
834 if (svp_tail) {
835 svp_2ndlast = svp_tail;
836 svp_tail->sv_next = svp;
837 } else {
838 svp_head = svp;
839 svp_2ndlast = svp;
840 }
841
842 svp_tail = svp;
843 svp->sv_knconf = args->knconf;
844 args->knconf = NULL;
845
846 /*
847 * Get server address
848 */
849 if (args->addr == NULL || args->addr->buf == NULL) {
850 error = EINVAL;
851 goto errout;
852 }
853
854 svp->sv_addr.maxlen = args->addr->maxlen;
855 svp->sv_addr.len = args->addr->len;
856 svp->sv_addr.buf = args->addr->buf;
857 args->addr->buf = NULL;
858
859 /*
860 * Get the root fhandle
861 */
862 if (args->fh == NULL || (strlen(args->fh) >= MAXPATHLEN)) {
863 error = EINVAL;
864 goto errout;
865 }
866
867 svp->sv_path = args->fh;
868 svp->sv_pathlen = strlen(args->fh) + 1;
869 args->fh = NULL;
870
871 /*
872 * Get server's hostname
873 */
874 if (flags & NFSMNT_HOSTNAME) {
875 if (args->hostname == NULL || (strlen(args->hostname) >
876 MAXNETNAMELEN)) {
877 error = EINVAL;
878 goto errout;
879 }
880 svp->sv_hostnamelen = strlen(args->hostname) + 1;
881 svp->sv_hostname = args->hostname;
882 args->hostname = NULL;
883 } else {
884 char *p = "unknown-host";
885 svp->sv_hostnamelen = strlen(p) + 1;
886 svp->sv_hostname = kmem_zalloc(svp->sv_hostnamelen, KM_SLEEP);
887 (void) strcpy(svp->sv_hostname, p);
888 }
889
890 /*
891 * RDMA MOUNT SUPPORT FOR NFS v4.
892 * Establish, is it possible to use RDMA, if so overload the
893 * knconf with rdma specific knconf and free the orignal knconf.
894 */
895 if ((flags & NFSMNT_TRYRDMA) || (flags & NFSMNT_DORDMA)) {
896 /*
897 * Determine the addr type for RDMA, IPv4 or v6.
898 */
899 if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET) == 0)
900 addr_type = AF_INET;
901 else if (strcmp(svp->sv_knconf->knc_protofmly, NC_INET6) == 0)
902 addr_type = AF_INET6;
903
904 if (rdma_reachable(addr_type, &svp->sv_addr,
905 &rdma_knconf) == 0) {
906 /*
907 * If successful, hijack the orignal knconf and
908 * replace with the new one, depending on the flags.
909 */
910 svp->sv_origknconf = svp->sv_knconf;
911 svp->sv_knconf = rdma_knconf;
912 } else {
913 if (flags & NFSMNT_TRYRDMA) {
914 #ifdef DEBUG
915 if (rdma_debug)
916 zcmn_err(getzoneid(), CE_WARN,
917 "no RDMA onboard, revert\n");
918 #endif
919 }
920
921 if (flags & NFSMNT_DORDMA) {
922 /*
923 * If proto=rdma is specified and no RDMA
924 * path to this server is avialable then
925 * ditch this server.
926 * This is not included in the mountable
927 * server list or the replica list.
928 * Check if more servers are specified;
929 * Failover case, otherwise bail out of mount.
930 */
931 if (args->nfs_args_ext == NFS_ARGS_EXTB &&
932 args->nfs_ext_u.nfs_extB.next != NULL) {
933 data = (char *)
934 args->nfs_ext_u.nfs_extB.next;
935 if (uap->flags & MS_RDONLY &&
936 !(flags & NFSMNT_SOFT)) {
937 if (svp_head->sv_next == NULL) {
938 svp_tail = NULL;
939 svp_2ndlast = NULL;
940 sv4_free(svp_head);
941 goto more;
942 } else {
943 svp_tail = svp_2ndlast;
944 svp_2ndlast->sv_next =
945 NULL;
946 sv4_free(svp);
947 goto more;
948 }
949 }
950 } else {
951 /*
952 * This is the last server specified
953 * in the nfs_args list passed down
954 * and its not rdma capable.
955 */
956 if (svp_head->sv_next == NULL) {
957 /*
958 * Is this the only one
959 */
960 error = EINVAL;
961 #ifdef DEBUG
962 if (rdma_debug)
963 zcmn_err(getzoneid(),
964 CE_WARN,
965 "No RDMA srv");
966 #endif
967 goto errout;
968 } else {
969 /*
970 * There is list, since some
971 * servers specified before
972 * this passed all requirements
973 */
974 svp_tail = svp_2ndlast;
975 svp_2ndlast->sv_next = NULL;
976 sv4_free(svp);
977 goto proceed;
978 }
979 }
980 }
981 }
982 }
983
984 /*
985 * If there are syncaddr and netname data, load them in. This is
986 * to support data needed for NFSV4 when AUTH_DH is the negotiated
987 * flavor via SECINFO. (instead of using MOUNT protocol in V3).
988 */
989 if (args->flags & NFSMNT_SECURE) {
990 svp->sv_dhsec = create_authdh_data(args->netname,
991 strlen(args->netname),
992 args->syncaddr, svp->sv_knconf);
993 }
994
995 /*
996 * Get the extention data which has the security data structure.
997 * This includes data for AUTH_SYS as well.
998 */
999 if (flags & NFSMNT_NEWARGS) {
1000 switch (args->nfs_args_ext) {
1001 case NFS_ARGS_EXTA:
1002 case NFS_ARGS_EXTB:
1003 /*
1004 * Indicating the application is using the new
1005 * sec_data structure to pass in the security
1006 * data.
1007 */
1008 secdata = args->nfs_ext_u.nfs_extA.secdata;
1009 if (secdata == NULL) {
1010 error = EINVAL;
1011 } else if (uap->flags & MS_SYSSPACE) {
1012 /*
1013 * Need to validate the flavor here if
1014 * sysspace, userspace was already
1015 * validate from the nfs_copyin function.
1016 */
1017 switch (secdata->rpcflavor) {
1018 case AUTH_NONE:
1019 case AUTH_UNIX:
1020 case AUTH_LOOPBACK:
1021 case AUTH_DES:
1022 case RPCSEC_GSS:
1023 break;
1024 default:
1025 error = EINVAL;
1026 goto errout;
1027 }
1028 }
1029 args->nfs_ext_u.nfs_extA.secdata = NULL;
1030 break;
1031
1032 default:
1033 error = EINVAL;
1034 break;
1035 }
1036
1037 } else if (flags & NFSMNT_SECURE) {
1038 /*
1039 * NFSMNT_SECURE is deprecated but we keep it
1040 * to support the rogue user-generated application
1041 * that may use this undocumented interface to do
1042 * AUTH_DH security, e.g. our own rexd.
1043 *
1044 * Also note that NFSMNT_SECURE is used for passing
1045 * AUTH_DH info to be used in negotiation.
1046 */
1047 secdata = create_authdh_data(args->netname,
1048 strlen(args->netname), args->syncaddr, svp->sv_knconf);
1049
1050 } else {
1051 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP);
1052 secdata->secmod = secdata->rpcflavor = AUTH_SYS;
1053 secdata->data = NULL;
1054 }
1055
1056 svp->sv_secdata = secdata;
1057
1058 /*
1059 * User does not explictly specify a flavor, and a user
1060 * defined default flavor is passed down.
1061 */
1062 if (flags & NFSMNT_SECDEFAULT) {
1063 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
1064 svp->sv_flags |= SV4_TRYSECDEFAULT;
1065 nfs_rw_exit(&svp->sv_lock);
1066 }
1067
1068 /*
1069 * Failover support:
1070 *
1071 * We may have a linked list of nfs_args structures,
1072 * which means the user is looking for failover. If
1073 * the mount is either not "read-only" or "soft",
1074 * we want to bail out with EINVAL.
1075 */
1076 if (args->nfs_args_ext == NFS_ARGS_EXTB &&
1077 args->nfs_ext_u.nfs_extB.next != NULL) {
1078 if (uap->flags & MS_RDONLY && !(flags & NFSMNT_SOFT)) {
1079 data = (char *)args->nfs_ext_u.nfs_extB.next;
1080 goto more;
1081 }
1082 error = EINVAL;
1083 goto errout;
1084 }
1085
1086 /*
1087 * Determine the zone we're being mounted into.
1088 */
1089 zone_hold(mntzone = zone); /* start with this assumption */
1090 if (getzoneid() == GLOBAL_ZONEID) {
1091 zone_rele(mntzone);
1092 mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt));
1093 ASSERT(mntzone != NULL);
1094 if (mntzone != zone) {
1095 error = EBUSY;
1096 goto errout;
1097 }
1098 }
1099
1100 if (is_system_labeled()) {
1101 error = nfs_mount_label_policy(vfsp, &svp->sv_addr,
1102 svp->sv_knconf, cr);
1103
1104 if (error > 0)
1105 goto errout;
1106
1107 if (error == -1) {
1108 /* change mount to read-only to prevent write-down */
1109 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
1110 }
1111 }
1112
1113 /*
1114 * Stop the mount from going any further if the zone is going away.
1115 */
1116 if (zone_status_get(mntzone) >= ZONE_IS_SHUTTING_DOWN) {
1117 error = EBUSY;
1118 goto errout;
1119 }
1120
1121 /*
1122 * Get root vnode.
1123 */
1124 proceed:
1125 error = nfs4rootvp(&rtvp, vfsp, svp_head, flags, cr, mntzone);
1126 if (error) {
1127 /* if nfs4rootvp failed, it will free svp_head */
1128 svp_head = NULL;
1129 goto errout;
1130 }
1131
1132 mi = VTOMI4(rtvp);
1133
1134 /*
1135 * Send client id to the server, if necessary
1136 */
1137 nfs4_error_zinit(&n4e);
1138 nfs4setclientid(mi, cr, FALSE, &n4e);
1139
1140 error = n4e.error;
1141
1142 if (error)
1143 goto errout;
1144
1145 /*
1146 * Set option fields in the mount info record
1147 */
1148
1149 if (svp_head->sv_next) {
1150 mutex_enter(&mi->mi_lock);
1151 mi->mi_flags |= MI4_LLOCK;
1152 mutex_exit(&mi->mi_lock);
1153 }
1154 error = nfs4_setopts(rtvp, DATAMODEL_NATIVE, args);
1155 if (error)
1156 goto errout;
1157
1158 /*
1159 * Time to tie in the mirror mount info at last!
1160 */
1161 if (flags & NFSMNT_EPHEMERAL)
1162 error = nfs4_record_ephemeral_mount(mi, mvp);
1163
1164 errout:
1165 if (error) {
1166 if (rtvp != NULL) {
1167 rp = VTOR4(rtvp);
1168 if (rp->r_flags & R4HASHED)
1169 rp4_rmhash(rp);
1170 }
1171 if (mi != NULL) {
1172 nfs4_async_stop(vfsp);
1173 nfs4_async_manager_stop(vfsp);
1174 nfs4_remove_mi_from_server(mi, NULL);
1175 if (rtvp != NULL)
1176 VN_RELE(rtvp);
1177 if (mntzone != NULL)
1178 zone_rele(mntzone);
1179 /* need to remove it from the zone */
1180 removed = nfs4_mi_zonelist_remove(mi);
1181 if (removed)
1182 zone_rele_ref(&mi->mi_zone_ref,
1183 ZONE_REF_NFSV4);
1184 MI4_RELE(mi);
1185 if (!(uap->flags & MS_SYSSPACE) && args) {
1186 nfs4_free_args(args);
1187 kmem_free(args, sizeof (*args));
1188 }
1189 return (error);
1190 }
1191 if (svp_head)
1192 sv4_free(svp_head);
1193 }
1194
1195 if (!(uap->flags & MS_SYSSPACE) && args) {
1196 nfs4_free_args(args);
1197 kmem_free(args, sizeof (*args));
1198 }
1199 if (rtvp != NULL)
1200 VN_RELE(rtvp);
1201
1202 if (mntzone != NULL)
1203 zone_rele(mntzone);
1204
1205 return (error);
1206 }
1207
1208 #ifdef DEBUG
1209 #define VERS_MSG "NFS4 server "
1210 #else
1211 #define VERS_MSG "NFS server "
1212 #endif
1213
1214 #define READ_MSG \
1215 VERS_MSG "%s returned 0 for read transfer size"
1216 #define WRITE_MSG \
1217 VERS_MSG "%s returned 0 for write transfer size"
1218 #define SIZE_MSG \
1219 VERS_MSG "%s returned 0 for maximum file size"
1220
1221 /*
1222 * Get the symbolic link text from the server for a given filehandle
1223 * of that symlink.
1224 *
1225 * (get symlink text) PUTFH READLINK
1226 */
1227 static int
getlinktext_otw(mntinfo4_t * mi,nfs_fh4 * fh,char ** linktextp,cred_t * cr,int flags)1228 getlinktext_otw(mntinfo4_t *mi, nfs_fh4 *fh, char **linktextp, cred_t *cr,
1229 int flags)
1230 {
1231 COMPOUND4args_clnt args;
1232 COMPOUND4res_clnt res;
1233 int doqueue;
1234 nfs_argop4 argop[2];
1235 nfs_resop4 *resop;
1236 READLINK4res *lr_res;
1237 uint_t len;
1238 bool_t needrecov = FALSE;
1239 nfs4_recov_state_t recov_state;
1240 nfs4_sharedfh_t *sfh;
1241 nfs4_error_t e;
1242 int num_retry = nfs4_max_mount_retry;
1243 int recovery = !(flags & NFS4_GETFH_NEEDSOP);
1244
1245 sfh = sfh4_get(fh, mi);
1246 recov_state.rs_flags = 0;
1247 recov_state.rs_num_retry_despite_err = 0;
1248
1249 recov_retry:
1250 nfs4_error_zinit(&e);
1251
1252 args.array_len = 2;
1253 args.array = argop;
1254 args.ctag = TAG_GET_SYMLINK;
1255
1256 if (! recovery) {
1257 e.error = nfs4_start_op(mi, NULL, NULL, &recov_state);
1258 if (e.error) {
1259 sfh4_rele(&sfh);
1260 return (e.error);
1261 }
1262 }
1263
1264 /* 0. putfh symlink fh */
1265 argop[0].argop = OP_CPUTFH;
1266 argop[0].nfs_argop4_u.opcputfh.sfh = sfh;
1267
1268 /* 1. readlink */
1269 argop[1].argop = OP_READLINK;
1270
1271 doqueue = 1;
1272
1273 rfs4call(mi, &args, &res, cr, &doqueue, 0, &e);
1274
1275 needrecov = nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp);
1276
1277 if (needrecov && !recovery && num_retry-- > 0) {
1278
1279 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE,
1280 "getlinktext_otw: initiating recovery\n"));
1281
1282 if (nfs4_start_recovery(&e, mi, NULL, NULL, NULL, NULL,
1283 OP_READLINK, NULL, NULL, NULL) == FALSE) {
1284 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov);
1285 if (!e.error)
1286 (void) xdr_free(xdr_COMPOUND4res_clnt,
1287 (caddr_t)&res);
1288 goto recov_retry;
1289 }
1290 }
1291
1292 /*
1293 * If non-NFS4 pcol error and/or we weren't able to recover.
1294 */
1295 if (e.error != 0) {
1296 if (! recovery)
1297 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov);
1298 sfh4_rele(&sfh);
1299 return (e.error);
1300 }
1301
1302 if (res.status) {
1303 e.error = geterrno4(res.status);
1304 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1305 if (! recovery)
1306 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov);
1307 sfh4_rele(&sfh);
1308 return (e.error);
1309 }
1310
1311 /* res.status == NFS4_OK */
1312 ASSERT(res.status == NFS4_OK);
1313
1314 resop = &res.array[1]; /* readlink res */
1315 lr_res = &resop->nfs_resop4_u.opreadlink;
1316
1317 /* treat symlink name as data */
1318 *linktextp = utf8_to_str(&lr_res->link, &len, NULL);
1319
1320 if (! recovery)
1321 nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov);
1322 sfh4_rele(&sfh);
1323 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1324 return (0);
1325 }
1326
1327 /*
1328 * Skip over consecutive slashes and "/./" in a pathname.
1329 */
1330 void
pathname_skipslashdot(struct pathname * pnp)1331 pathname_skipslashdot(struct pathname *pnp)
1332 {
1333 char *c1, *c2;
1334
1335 while (pnp->pn_pathlen > 0 && *pnp->pn_path == '/') {
1336
1337 c1 = pnp->pn_path + 1;
1338 c2 = pnp->pn_path + 2;
1339
1340 if (*c1 == '.' && (*c2 == '/' || *c2 == '\0')) {
1341 pnp->pn_path = pnp->pn_path + 2; /* skip "/." */
1342 pnp->pn_pathlen = pnp->pn_pathlen - 2;
1343 } else {
1344 pnp->pn_path++;
1345 pnp->pn_pathlen--;
1346 }
1347 }
1348 }
1349
1350 /*
1351 * Resolve a symbolic link path. The symlink is in the nth component of
1352 * svp->sv_path and has an nfs4 file handle "fh".
1353 * Upon return, the sv_path will point to the new path that has the nth
1354 * component resolved to its symlink text.
1355 */
1356 int
resolve_sympath(mntinfo4_t * mi,servinfo4_t * svp,int nth,nfs_fh4 * fh,cred_t * cr,int flags)1357 resolve_sympath(mntinfo4_t *mi, servinfo4_t *svp, int nth, nfs_fh4 *fh,
1358 cred_t *cr, int flags)
1359 {
1360 char *oldpath;
1361 char *symlink, *newpath;
1362 struct pathname oldpn, newpn;
1363 char component[MAXNAMELEN];
1364 int i, addlen, error = 0;
1365 int oldpathlen;
1366
1367 /* Get the symbolic link text over the wire. */
1368 error = getlinktext_otw(mi, fh, &symlink, cr, flags);
1369
1370 if (error || symlink == NULL || strlen(symlink) == 0)
1371 return (error);
1372
1373 /*
1374 * Compose the new pathname.
1375 * Note:
1376 * - only the nth component is resolved for the pathname.
1377 * - pathname.pn_pathlen does not count the ending null byte.
1378 */
1379 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
1380 oldpath = svp->sv_path;
1381 oldpathlen = svp->sv_pathlen;
1382 if (error = pn_get(oldpath, UIO_SYSSPACE, &oldpn)) {
1383 nfs_rw_exit(&svp->sv_lock);
1384 kmem_free(symlink, strlen(symlink) + 1);
1385 return (error);
1386 }
1387 nfs_rw_exit(&svp->sv_lock);
1388 pn_alloc(&newpn);
1389
1390 /*
1391 * Skip over previous components from the oldpath so that the
1392 * oldpn.pn_path will point to the symlink component. Skip
1393 * leading slashes and "/./" (no OP_LOOKUP on ".") so that
1394 * pn_getcompnent can get the component.
1395 */
1396 for (i = 1; i < nth; i++) {
1397 pathname_skipslashdot(&oldpn);
1398 error = pn_getcomponent(&oldpn, component);
1399 if (error)
1400 goto out;
1401 }
1402
1403 /*
1404 * Copy the old path upto the component right before the symlink
1405 * if the symlink is not an absolute path.
1406 */
1407 if (symlink[0] != '/') {
1408 addlen = oldpn.pn_path - oldpn.pn_buf;
1409 bcopy(oldpn.pn_buf, newpn.pn_path, addlen);
1410 newpn.pn_pathlen += addlen;
1411 newpn.pn_path += addlen;
1412 newpn.pn_buf[newpn.pn_pathlen] = '/';
1413 newpn.pn_pathlen++;
1414 newpn.pn_path++;
1415 }
1416
1417 /* copy the resolved symbolic link text */
1418 addlen = strlen(symlink);
1419 if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) {
1420 error = ENAMETOOLONG;
1421 goto out;
1422 }
1423 bcopy(symlink, newpn.pn_path, addlen);
1424 newpn.pn_pathlen += addlen;
1425 newpn.pn_path += addlen;
1426
1427 /*
1428 * Check if there is any remaining path after the symlink component.
1429 * First, skip the symlink component.
1430 */
1431 pathname_skipslashdot(&oldpn);
1432 if (error = pn_getcomponent(&oldpn, component))
1433 goto out;
1434
1435 addlen = pn_pathleft(&oldpn); /* includes counting the slash */
1436
1437 /*
1438 * Copy the remaining path to the new pathname if there is any.
1439 */
1440 if (addlen > 0) {
1441 if (newpn.pn_pathlen + addlen >= newpn.pn_bufsize) {
1442 error = ENAMETOOLONG;
1443 goto out;
1444 }
1445 bcopy(oldpn.pn_path, newpn.pn_path, addlen);
1446 newpn.pn_pathlen += addlen;
1447 }
1448 newpn.pn_buf[newpn.pn_pathlen] = '\0';
1449
1450 /* get the newpath and store it in the servinfo4_t */
1451 newpath = kmem_alloc(newpn.pn_pathlen + 1, KM_SLEEP);
1452 bcopy(newpn.pn_buf, newpath, newpn.pn_pathlen);
1453 newpath[newpn.pn_pathlen] = '\0';
1454
1455 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
1456 svp->sv_path = newpath;
1457 svp->sv_pathlen = strlen(newpath) + 1;
1458 nfs_rw_exit(&svp->sv_lock);
1459
1460 kmem_free(oldpath, oldpathlen);
1461 out:
1462 kmem_free(symlink, strlen(symlink) + 1);
1463 pn_free(&newpn);
1464 pn_free(&oldpn);
1465
1466 return (error);
1467 }
1468
1469 /*
1470 * This routine updates servinfo4 structure with the new referred server
1471 * info.
1472 * nfsfsloc has the location related information
1473 * fsp has the hostname and pathname info.
1474 * new path = pathname from referral + part of orig pathname(based on nth).
1475 */
1476 static void
update_servinfo4(servinfo4_t * svp,fs_location4 * fsp,struct nfs_fsl_info * nfsfsloc,char * orig_path,int nth)1477 update_servinfo4(servinfo4_t *svp, fs_location4 *fsp,
1478 struct nfs_fsl_info *nfsfsloc, char *orig_path, int nth)
1479 {
1480 struct knetconfig *knconf, *svknconf;
1481 struct netbuf *saddr;
1482 sec_data_t *secdata;
1483 utf8string *host;
1484 int i = 0, num_slashes = 0;
1485 char *p, *spath, *op, *new_path;
1486
1487 /* Update knconf */
1488 knconf = svp->sv_knconf;
1489 free_knconf_contents(knconf);
1490 bzero(knconf, sizeof (struct knetconfig));
1491 svknconf = nfsfsloc->knconf;
1492 knconf->knc_semantics = svknconf->knc_semantics;
1493 knconf->knc_protofmly = kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
1494 knconf->knc_proto = kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
1495 knconf->knc_rdev = svknconf->knc_rdev;
1496 bcopy(svknconf->knc_protofmly, knconf->knc_protofmly, KNC_STRSIZE);
1497 bcopy(svknconf->knc_proto, knconf->knc_proto, KNC_STRSIZE);
1498
1499 /* Update server address */
1500 saddr = &svp->sv_addr;
1501 if (saddr->buf != NULL)
1502 kmem_free(saddr->buf, saddr->maxlen);
1503 saddr->buf = kmem_alloc(nfsfsloc->addr->maxlen, KM_SLEEP);
1504 saddr->len = nfsfsloc->addr->len;
1505 saddr->maxlen = nfsfsloc->addr->maxlen;
1506 bcopy(nfsfsloc->addr->buf, saddr->buf, nfsfsloc->addr->len);
1507
1508 /* Update server name */
1509 host = fsp->server_val;
1510 kmem_free(svp->sv_hostname, svp->sv_hostnamelen);
1511 svp->sv_hostname = kmem_zalloc(host->utf8string_len + 1, KM_SLEEP);
1512 bcopy(host->utf8string_val, svp->sv_hostname, host->utf8string_len);
1513 svp->sv_hostname[host->utf8string_len] = '\0';
1514 svp->sv_hostnamelen = host->utf8string_len + 1;
1515
1516 /*
1517 * Update server path.
1518 * We need to setup proper path here.
1519 * For ex., If we got a path name serv1:/rp/aaa/bbb
1520 * where aaa is a referral and points to serv2:/rpool/aa
1521 * we need to set the path to serv2:/rpool/aa/bbb
1522 * The first part of this below code generates /rpool/aa
1523 * and the second part appends /bbb to the server path.
1524 */
1525 spath = p = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1526 *p++ = '/';
1527 for (i = 0; i < fsp->rootpath.pathname4_len; i++) {
1528 component4 *comp;
1529
1530 comp = &fsp->rootpath.pathname4_val[i];
1531 /* If no space, null the string and bail */
1532 if ((p - spath) + comp->utf8string_len + 1 > MAXPATHLEN) {
1533 p = spath + MAXPATHLEN - 1;
1534 spath[0] = '\0';
1535 break;
1536 }
1537 bcopy(comp->utf8string_val, p, comp->utf8string_len);
1538 p += comp->utf8string_len;
1539 *p++ = '/';
1540 }
1541 if (fsp->rootpath.pathname4_len != 0)
1542 *(p - 1) = '\0';
1543 else
1544 *p = '\0';
1545 p = spath;
1546
1547 new_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1548 (void) strlcpy(new_path, p, MAXPATHLEN);
1549 kmem_free(p, MAXPATHLEN);
1550 i = strlen(new_path);
1551
1552 for (op = orig_path; *op; op++) {
1553 if (*op == '/')
1554 num_slashes++;
1555 if (num_slashes == nth + 2) {
1556 while (*op != '\0') {
1557 new_path[i] = *op;
1558 i++;
1559 op++;
1560 }
1561 break;
1562 }
1563 }
1564 new_path[i] = '\0';
1565
1566 kmem_free(svp->sv_path, svp->sv_pathlen);
1567 svp->sv_pathlen = strlen(new_path) + 1;
1568 svp->sv_path = kmem_alloc(svp->sv_pathlen, KM_SLEEP);
1569 bcopy(new_path, svp->sv_path, svp->sv_pathlen);
1570 kmem_free(new_path, MAXPATHLEN);
1571
1572 /*
1573 * All the security data is specific to old server.
1574 * Clean it up except secdata which deals with mount options.
1575 * We need to inherit that data. Copy secdata into our new servinfo4.
1576 */
1577 if (svp->sv_dhsec) {
1578 sec_clnt_freeinfo(svp->sv_dhsec);
1579 svp->sv_dhsec = NULL;
1580 }
1581 if (svp->sv_save_secinfo &&
1582 svp->sv_save_secinfo != svp->sv_secinfo) {
1583 secinfo_free(svp->sv_save_secinfo);
1584 svp->sv_save_secinfo = NULL;
1585 }
1586 if (svp->sv_secinfo) {
1587 secinfo_free(svp->sv_secinfo);
1588 svp->sv_secinfo = NULL;
1589 }
1590 svp->sv_currsec = NULL;
1591
1592 secdata = kmem_alloc(sizeof (*secdata), KM_SLEEP);
1593 *secdata = *svp->sv_secdata;
1594 secdata->data = NULL;
1595 if (svp->sv_secdata) {
1596 sec_clnt_freeinfo(svp->sv_secdata);
1597 svp->sv_secdata = NULL;
1598 }
1599 svp->sv_secdata = secdata;
1600 }
1601
1602 /*
1603 * Resolve a referral. The referral is in the n+1th component of
1604 * svp->sv_path and has a parent nfs4 file handle "fh".
1605 * Upon return, the sv_path will point to the new path that has referral
1606 * component resolved to its referred path and part of original path.
1607 * Hostname and other address information is also updated.
1608 */
1609 int
resolve_referral(mntinfo4_t * mi,servinfo4_t * svp,cred_t * cr,int nth,nfs_fh4 * fh)1610 resolve_referral(mntinfo4_t *mi, servinfo4_t *svp, cred_t *cr, int nth,
1611 nfs_fh4 *fh)
1612 {
1613 nfs4_sharedfh_t *sfh;
1614 struct nfs_fsl_info nfsfsloc;
1615 nfs4_ga_res_t garp;
1616 COMPOUND4res_clnt callres;
1617 fs_location4 *fsp;
1618 char *nm, *orig_path;
1619 int orig_pathlen = 0, ret = -1, index;
1620
1621 if (svp->sv_pathlen <= 0)
1622 return (ret);
1623
1624 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
1625 orig_pathlen = svp->sv_pathlen;
1626 orig_path = kmem_alloc(orig_pathlen, KM_SLEEP);
1627 bcopy(svp->sv_path, orig_path, orig_pathlen);
1628 nm = extract_referral_point(svp->sv_path, nth);
1629 setup_newsvpath(svp, nth);
1630 nfs_rw_exit(&svp->sv_lock);
1631
1632 sfh = sfh4_get(fh, mi);
1633 index = nfs4_process_referral(mi, sfh, nm, cr,
1634 &garp, &callres, &nfsfsloc);
1635 sfh4_rele(&sfh);
1636 kmem_free(nm, MAXPATHLEN);
1637 if (index < 0) {
1638 kmem_free(orig_path, orig_pathlen);
1639 return (index);
1640 }
1641
1642 fsp = &garp.n4g_ext_res->n4g_fslocations.locations_val[index];
1643 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
1644 update_servinfo4(svp, fsp, &nfsfsloc, orig_path, nth);
1645 nfs_rw_exit(&svp->sv_lock);
1646
1647 mutex_enter(&mi->mi_lock);
1648 mi->mi_vfs_referral_loop_cnt++;
1649 mutex_exit(&mi->mi_lock);
1650
1651 ret = 0;
1652 bad:
1653 /* Free up XDR memory allocated in nfs4_process_referral() */
1654 xdr_free(xdr_nfs_fsl_info, (char *)&nfsfsloc);
1655 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&callres);
1656 kmem_free(orig_path, orig_pathlen);
1657
1658 return (ret);
1659 }
1660
1661 /*
1662 * Get the root filehandle for the given filesystem and server, and update
1663 * svp.
1664 *
1665 * If NFS4_GETFH_NEEDSOP is set, then use nfs4_start_fop and nfs4_end_fop
1666 * to coordinate with recovery. Otherwise, the caller is assumed to be
1667 * the recovery thread or have already done a start_fop.
1668 *
1669 * Errors are returned by the nfs4_error_t parameter.
1670 */
1671 static void
nfs4getfh_otw(struct mntinfo4 * mi,servinfo4_t * svp,vtype_t * vtp,int flags,cred_t * cr,nfs4_error_t * ep)1672 nfs4getfh_otw(struct mntinfo4 *mi, servinfo4_t *svp, vtype_t *vtp,
1673 int flags, cred_t *cr, nfs4_error_t *ep)
1674 {
1675 COMPOUND4args_clnt args;
1676 COMPOUND4res_clnt res;
1677 int doqueue = 1;
1678 nfs_argop4 *argop;
1679 nfs_resop4 *resop;
1680 nfs4_ga_res_t *garp;
1681 int num_argops;
1682 lookup4_param_t lookuparg;
1683 nfs_fh4 *tmpfhp;
1684 nfs_fh4 *resfhp;
1685 bool_t needrecov = FALSE;
1686 nfs4_recov_state_t recov_state;
1687 int llndx;
1688 int nthcomp;
1689 int recovery = !(flags & NFS4_GETFH_NEEDSOP);
1690
1691 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
1692 ASSERT(svp->sv_path != NULL);
1693 if (svp->sv_path[0] == '\0') {
1694 nfs_rw_exit(&svp->sv_lock);
1695 nfs4_error_init(ep, EINVAL);
1696 return;
1697 }
1698 nfs_rw_exit(&svp->sv_lock);
1699
1700 recov_state.rs_flags = 0;
1701 recov_state.rs_num_retry_despite_err = 0;
1702
1703 recov_retry:
1704 if (mi->mi_vfs_referral_loop_cnt >= NFS4_REFERRAL_LOOP_MAX) {
1705 DTRACE_PROBE3(nfs4clnt__debug__referral__loop, mntinfo4 *,
1706 mi, servinfo4_t *, svp, char *, "nfs4getfh_otw");
1707 nfs4_error_init(ep, EINVAL);
1708 return;
1709 }
1710 nfs4_error_zinit(ep);
1711
1712 if (!recovery) {
1713 ep->error = nfs4_start_fop(mi, NULL, NULL, OH_MOUNT,
1714 &recov_state, NULL);
1715
1716 /*
1717 * If recovery has been started and this request as
1718 * initiated by a mount, then we must wait for recovery
1719 * to finish before proceeding, otherwise, the error
1720 * cleanup would remove data structures needed by the
1721 * recovery thread.
1722 */
1723 if (ep->error) {
1724 mutex_enter(&mi->mi_lock);
1725 if (mi->mi_flags & MI4_MOUNTING) {
1726 mi->mi_flags |= MI4_RECOV_FAIL;
1727 mi->mi_error = EIO;
1728
1729 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE,
1730 "nfs4getfh_otw: waiting 4 recovery\n"));
1731
1732 while (mi->mi_flags & MI4_RECOV_ACTIV)
1733 cv_wait(&mi->mi_failover_cv,
1734 &mi->mi_lock);
1735 }
1736 mutex_exit(&mi->mi_lock);
1737 return;
1738 }
1739
1740 /*
1741 * If the client does not specify a specific flavor to use
1742 * and has not gotten a secinfo list from the server yet,
1743 * retrieve the secinfo list from the server and use a
1744 * flavor from the list to mount.
1745 *
1746 * If fail to get the secinfo list from the server, then
1747 * try the default flavor.
1748 */
1749 if ((svp->sv_flags & SV4_TRYSECDEFAULT) &&
1750 svp->sv_secinfo == NULL) {
1751 (void) nfs4_secinfo_path(mi, cr, FALSE);
1752 }
1753 }
1754
1755 if (recovery)
1756 args.ctag = TAG_REMAP_MOUNT;
1757 else
1758 args.ctag = TAG_MOUNT;
1759
1760 lookuparg.l4_getattrs = LKP4_ALL_ATTRIBUTES;
1761 lookuparg.argsp = &args;
1762 lookuparg.resp = &res;
1763 lookuparg.header_len = 2; /* Putrootfh, getfh */
1764 lookuparg.trailer_len = 0;
1765 lookuparg.ga_bits = FATTR4_FSINFO_MASK;
1766 lookuparg.mi = mi;
1767
1768 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
1769 ASSERT(svp->sv_path != NULL);
1770 llndx = nfs4lookup_setup(svp->sv_path, &lookuparg, 0);
1771 nfs_rw_exit(&svp->sv_lock);
1772
1773 argop = args.array;
1774 num_argops = args.array_len;
1775
1776 /* choose public or root filehandle */
1777 if (flags & NFS4_GETFH_PUBLIC)
1778 argop[0].argop = OP_PUTPUBFH;
1779 else
1780 argop[0].argop = OP_PUTROOTFH;
1781
1782 /* get fh */
1783 argop[1].argop = OP_GETFH;
1784
1785 NFS4_DEBUG(nfs4_client_call_debug, (CE_NOTE,
1786 "nfs4getfh_otw: %s call, mi 0x%p",
1787 needrecov ? "recov" : "first", (void *)mi));
1788
1789 rfs4call(mi, &args, &res, cr, &doqueue, RFSCALL_SOFT, ep);
1790
1791 needrecov = nfs4_needs_recovery(ep, FALSE, mi->mi_vfsp);
1792
1793 if (needrecov) {
1794 bool_t abort;
1795
1796 if (recovery) {
1797 nfs4args_lookup_free(argop, num_argops);
1798 kmem_free(argop,
1799 lookuparg.arglen * sizeof (nfs_argop4));
1800 if (!ep->error)
1801 (void) xdr_free(xdr_COMPOUND4res_clnt,
1802 (caddr_t)&res);
1803 return;
1804 }
1805
1806 NFS4_DEBUG(nfs4_client_recov_debug,
1807 (CE_NOTE, "nfs4getfh_otw: initiating recovery\n"));
1808
1809 abort = nfs4_start_recovery(ep, mi, NULL,
1810 NULL, NULL, NULL, OP_GETFH, NULL, NULL, NULL);
1811 if (!ep->error) {
1812 ep->error = geterrno4(res.status);
1813 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1814 }
1815 nfs4args_lookup_free(argop, num_argops);
1816 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4));
1817 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov);
1818 /* have another go? */
1819 if (abort == FALSE)
1820 goto recov_retry;
1821 return;
1822 }
1823
1824 /*
1825 * No recovery, but check if error is set.
1826 */
1827 if (ep->error) {
1828 nfs4args_lookup_free(argop, num_argops);
1829 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4));
1830 if (!recovery)
1831 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state,
1832 needrecov);
1833 return;
1834 }
1835
1836 is_link_err:
1837
1838 /* for non-recovery errors */
1839 if (res.status && res.status != NFS4ERR_SYMLINK &&
1840 res.status != NFS4ERR_MOVED) {
1841 if (!recovery) {
1842 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state,
1843 needrecov);
1844 }
1845 nfs4args_lookup_free(argop, num_argops);
1846 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4));
1847 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1848 return;
1849 }
1850
1851 /*
1852 * If any intermediate component in the path is a symbolic link,
1853 * resolve the symlink, then try mount again using the new path.
1854 */
1855 if (res.status == NFS4ERR_SYMLINK || res.status == NFS4ERR_MOVED) {
1856 int where;
1857
1858 /*
1859 * Need to call nfs4_end_op before resolve_sympath to avoid
1860 * potential nfs4_start_op deadlock.
1861 */
1862 if (!recovery)
1863 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state,
1864 needrecov);
1865
1866 /*
1867 * This must be from OP_LOOKUP failure. The (cfh) for this
1868 * OP_LOOKUP is a symlink node. Found out where the
1869 * OP_GETFH is for the (cfh) that is a symlink node.
1870 *
1871 * Example:
1872 * (mount) PUTROOTFH, GETFH, LOOKUP comp1, GETFH, GETATTR,
1873 * LOOKUP comp2, GETFH, GETATTR, LOOKUP comp3, GETFH, GETATTR
1874 *
1875 * LOOKUP comp3 fails with SYMLINK because comp2 is a symlink.
1876 * In this case, where = 7, nthcomp = 2.
1877 */
1878 where = res.array_len - 2;
1879 ASSERT(where > 0);
1880
1881 if (res.status == NFS4ERR_SYMLINK) {
1882
1883 resop = &res.array[where - 1];
1884 ASSERT(resop->resop == OP_GETFH);
1885 tmpfhp = &resop->nfs_resop4_u.opgetfh.object;
1886 nthcomp = res.array_len/3 - 1;
1887 ep->error = resolve_sympath(mi, svp, nthcomp,
1888 tmpfhp, cr, flags);
1889
1890 } else if (res.status == NFS4ERR_MOVED) {
1891
1892 resop = &res.array[where - 2];
1893 ASSERT(resop->resop == OP_GETFH);
1894 tmpfhp = &resop->nfs_resop4_u.opgetfh.object;
1895 nthcomp = res.array_len/3 - 1;
1896 ep->error = resolve_referral(mi, svp, cr, nthcomp,
1897 tmpfhp);
1898 }
1899
1900 nfs4args_lookup_free(argop, num_argops);
1901 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4));
1902 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1903
1904 if (ep->error)
1905 return;
1906
1907 goto recov_retry;
1908 }
1909
1910 /* getfh */
1911 resop = &res.array[res.array_len - 2];
1912 ASSERT(resop->resop == OP_GETFH);
1913 resfhp = &resop->nfs_resop4_u.opgetfh.object;
1914
1915 /* getattr fsinfo res */
1916 resop++;
1917 garp = &resop->nfs_resop4_u.opgetattr.ga_res;
1918
1919 *vtp = garp->n4g_va.va_type;
1920
1921 mi->mi_fh_expire_type = garp->n4g_ext_res->n4g_fet;
1922
1923 mutex_enter(&mi->mi_lock);
1924 if (garp->n4g_ext_res->n4g_pc4.pc4_link_support)
1925 mi->mi_flags |= MI4_LINK;
1926 if (garp->n4g_ext_res->n4g_pc4.pc4_symlink_support)
1927 mi->mi_flags |= MI4_SYMLINK;
1928 if (garp->n4g_ext_res->n4g_suppattrs & FATTR4_ACL_MASK)
1929 mi->mi_flags |= MI4_ACL;
1930 mutex_exit(&mi->mi_lock);
1931
1932 if (garp->n4g_ext_res->n4g_maxread == 0)
1933 mi->mi_tsize =
1934 MIN(MAXBSIZE, mi->mi_tsize);
1935 else
1936 mi->mi_tsize =
1937 MIN(garp->n4g_ext_res->n4g_maxread,
1938 mi->mi_tsize);
1939
1940 if (garp->n4g_ext_res->n4g_maxwrite == 0)
1941 mi->mi_stsize =
1942 MIN(MAXBSIZE, mi->mi_stsize);
1943 else
1944 mi->mi_stsize =
1945 MIN(garp->n4g_ext_res->n4g_maxwrite,
1946 mi->mi_stsize);
1947
1948 if (garp->n4g_ext_res->n4g_maxfilesize != 0)
1949 mi->mi_maxfilesize =
1950 MIN(garp->n4g_ext_res->n4g_maxfilesize,
1951 mi->mi_maxfilesize);
1952
1953 /*
1954 * If the final component is a a symbolic link, resolve the symlink,
1955 * then try mount again using the new path.
1956 *
1957 * Assume no symbolic link for root filesysm "/".
1958 */
1959 if (*vtp == VLNK) {
1960 /*
1961 * nthcomp is the total result length minus
1962 * the 1st 2 OPs (PUTROOTFH, GETFH),
1963 * then divided by 3 (LOOKUP,GETFH,GETATTR)
1964 *
1965 * e.g. PUTROOTFH GETFH LOOKUP 1st-comp GETFH GETATTR
1966 * LOOKUP 2nd-comp GETFH GETATTR
1967 *
1968 * (8 - 2)/3 = 2
1969 */
1970 nthcomp = (res.array_len - 2)/3;
1971
1972 /*
1973 * Need to call nfs4_end_op before resolve_sympath to avoid
1974 * potential nfs4_start_op deadlock. See RFE 4777612.
1975 */
1976 if (!recovery)
1977 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state,
1978 needrecov);
1979
1980 ep->error = resolve_sympath(mi, svp, nthcomp, resfhp, cr,
1981 flags);
1982
1983 nfs4args_lookup_free(argop, num_argops);
1984 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4));
1985 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1986
1987 if (ep->error)
1988 return;
1989
1990 goto recov_retry;
1991 }
1992
1993 /*
1994 * We need to figure out where in the compound the getfh
1995 * for the parent directory is. If the object to be mounted is
1996 * the root, then there is no lookup at all:
1997 * PUTROOTFH, GETFH.
1998 * If the object to be mounted is in the root, then the compound is:
1999 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR.
2000 * In either of these cases, the index of the GETFH is 1.
2001 * If it is not at the root, then it's something like:
2002 * PUTROOTFH, GETFH, LOOKUP, GETFH, GETATTR,
2003 * LOOKUP, GETFH, GETATTR
2004 * In this case, the index is llndx (last lookup index) - 2.
2005 */
2006 if (llndx == -1 || llndx == 2)
2007 resop = &res.array[1];
2008 else {
2009 ASSERT(llndx > 2);
2010 resop = &res.array[llndx-2];
2011 }
2012
2013 ASSERT(resop->resop == OP_GETFH);
2014 tmpfhp = &resop->nfs_resop4_u.opgetfh.object;
2015
2016 /* save the filehandles for the replica */
2017 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
2018 ASSERT(tmpfhp->nfs_fh4_len <= NFS4_FHSIZE);
2019 svp->sv_pfhandle.fh_len = tmpfhp->nfs_fh4_len;
2020 bcopy(tmpfhp->nfs_fh4_val, svp->sv_pfhandle.fh_buf,
2021 tmpfhp->nfs_fh4_len);
2022 ASSERT(resfhp->nfs_fh4_len <= NFS4_FHSIZE);
2023 svp->sv_fhandle.fh_len = resfhp->nfs_fh4_len;
2024 bcopy(resfhp->nfs_fh4_val, svp->sv_fhandle.fh_buf, resfhp->nfs_fh4_len);
2025
2026 /* initialize fsid and supp_attrs for server fs */
2027 svp->sv_fsid = garp->n4g_fsid;
2028 svp->sv_supp_attrs =
2029 garp->n4g_ext_res->n4g_suppattrs | FATTR4_MANDATTR_MASK;
2030
2031 nfs_rw_exit(&svp->sv_lock);
2032 nfs4args_lookup_free(argop, num_argops);
2033 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4));
2034 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
2035 if (!recovery)
2036 nfs4_end_fop(mi, NULL, NULL, OH_MOUNT, &recov_state, needrecov);
2037 }
2038
2039 /*
2040 * Save a copy of Servinfo4_t structure.
2041 * We might need when there is a failure in getting file handle
2042 * in case of a referral to replace servinfo4 struct and try again.
2043 */
2044 static struct servinfo4 *
copy_svp(servinfo4_t * nsvp)2045 copy_svp(servinfo4_t *nsvp)
2046 {
2047 servinfo4_t *svp = NULL;
2048 struct knetconfig *sknconf, *tknconf;
2049 struct netbuf *saddr, *taddr;
2050
2051 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP);
2052 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL);
2053 svp->sv_flags = nsvp->sv_flags;
2054 svp->sv_fsid = nsvp->sv_fsid;
2055 svp->sv_hostnamelen = nsvp->sv_hostnamelen;
2056 svp->sv_pathlen = nsvp->sv_pathlen;
2057 svp->sv_supp_attrs = nsvp->sv_supp_attrs;
2058
2059 svp->sv_path = kmem_alloc(svp->sv_pathlen, KM_SLEEP);
2060 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP);
2061 bcopy(nsvp->sv_hostname, svp->sv_hostname, svp->sv_hostnamelen);
2062 bcopy(nsvp->sv_path, svp->sv_path, svp->sv_pathlen);
2063
2064 saddr = &nsvp->sv_addr;
2065 taddr = &svp->sv_addr;
2066 taddr->maxlen = saddr->maxlen;
2067 taddr->len = saddr->len;
2068 if (saddr->len > 0) {
2069 taddr->buf = kmem_zalloc(saddr->maxlen, KM_SLEEP);
2070 bcopy(saddr->buf, taddr->buf, saddr->len);
2071 }
2072
2073 svp->sv_knconf = kmem_zalloc(sizeof (struct knetconfig), KM_SLEEP);
2074 sknconf = nsvp->sv_knconf;
2075 tknconf = svp->sv_knconf;
2076 tknconf->knc_semantics = sknconf->knc_semantics;
2077 tknconf->knc_rdev = sknconf->knc_rdev;
2078 if (sknconf->knc_proto != NULL) {
2079 tknconf->knc_proto = kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
2080 bcopy(sknconf->knc_proto, (char *)tknconf->knc_proto,
2081 KNC_STRSIZE);
2082 }
2083 if (sknconf->knc_protofmly != NULL) {
2084 tknconf->knc_protofmly = kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
2085 bcopy(sknconf->knc_protofmly, (char *)tknconf->knc_protofmly,
2086 KNC_STRSIZE);
2087 }
2088
2089 if (nsvp->sv_origknconf != NULL) {
2090 svp->sv_origknconf = kmem_zalloc(sizeof (struct knetconfig),
2091 KM_SLEEP);
2092 sknconf = nsvp->sv_origknconf;
2093 tknconf = svp->sv_origknconf;
2094 tknconf->knc_semantics = sknconf->knc_semantics;
2095 tknconf->knc_rdev = sknconf->knc_rdev;
2096 if (sknconf->knc_proto != NULL) {
2097 tknconf->knc_proto = kmem_zalloc(KNC_STRSIZE, KM_SLEEP);
2098 bcopy(sknconf->knc_proto, (char *)tknconf->knc_proto,
2099 KNC_STRSIZE);
2100 }
2101 if (sknconf->knc_protofmly != NULL) {
2102 tknconf->knc_protofmly = kmem_zalloc(KNC_STRSIZE,
2103 KM_SLEEP);
2104 bcopy(sknconf->knc_protofmly,
2105 (char *)tknconf->knc_protofmly, KNC_STRSIZE);
2106 }
2107 }
2108
2109 svp->sv_secdata = copy_sec_data(nsvp->sv_secdata);
2110 svp->sv_dhsec = copy_sec_data(svp->sv_dhsec);
2111 /*
2112 * Rest of the security information is not copied as they are built
2113 * with the information available from secdata and dhsec.
2114 */
2115 svp->sv_next = NULL;
2116
2117 return (svp);
2118 }
2119
2120 servinfo4_t *
restore_svp(mntinfo4_t * mi,servinfo4_t * svp,servinfo4_t * origsvp)2121 restore_svp(mntinfo4_t *mi, servinfo4_t *svp, servinfo4_t *origsvp)
2122 {
2123 servinfo4_t *srvnext, *tmpsrv;
2124
2125 if (strcmp(svp->sv_hostname, origsvp->sv_hostname) != 0) {
2126 /*
2127 * Since the hostname changed, we must be dealing
2128 * with a referral, and the lookup failed. We will
2129 * restore the whole servinfo4_t to what it was before.
2130 */
2131 srvnext = svp->sv_next;
2132 svp->sv_next = NULL;
2133 tmpsrv = copy_svp(origsvp);
2134 sv4_free(svp);
2135 svp = tmpsrv;
2136 svp->sv_next = srvnext;
2137 mutex_enter(&mi->mi_lock);
2138 mi->mi_servers = svp;
2139 mi->mi_curr_serv = svp;
2140 mutex_exit(&mi->mi_lock);
2141
2142 } else if (origsvp->sv_pathlen != svp->sv_pathlen) {
2143
2144 /*
2145 * For symlink case: restore original path because
2146 * it might have contained symlinks that were
2147 * expanded by nfsgetfh_otw before the failure occurred.
2148 */
2149 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
2150 kmem_free(svp->sv_path, svp->sv_pathlen);
2151 svp->sv_path =
2152 kmem_alloc(origsvp->sv_pathlen, KM_SLEEP);
2153 svp->sv_pathlen = origsvp->sv_pathlen;
2154 bcopy(origsvp->sv_path, svp->sv_path,
2155 origsvp->sv_pathlen);
2156 nfs_rw_exit(&svp->sv_lock);
2157 }
2158 return (svp);
2159 }
2160
2161 static ushort_t nfs4_max_threads = 8; /* max number of active async threads */
2162 uint_t nfs4_bsize = 32 * 1024; /* client `block' size */
2163 static uint_t nfs4_async_clusters = 1; /* # of reqs from each async queue */
2164 static uint_t nfs4_cots_timeo = NFS_COTS_TIMEO;
2165
2166 /*
2167 * Remap the root filehandle for the given filesystem.
2168 *
2169 * results returned via the nfs4_error_t parameter.
2170 */
2171 void
nfs4_remap_root(mntinfo4_t * mi,nfs4_error_t * ep,int flags)2172 nfs4_remap_root(mntinfo4_t *mi, nfs4_error_t *ep, int flags)
2173 {
2174 struct servinfo4 *svp, *origsvp;
2175 vtype_t vtype;
2176 nfs_fh4 rootfh;
2177 int getfh_flags;
2178 int num_retry;
2179
2180 mutex_enter(&mi->mi_lock);
2181
2182 remap_retry:
2183 svp = mi->mi_curr_serv;
2184 getfh_flags =
2185 (flags & NFS4_REMAP_NEEDSOP) ? NFS4_GETFH_NEEDSOP : 0;
2186 getfh_flags |=
2187 (mi->mi_flags & MI4_PUBLIC) ? NFS4_GETFH_PUBLIC : 0;
2188 mutex_exit(&mi->mi_lock);
2189
2190 /*
2191 * Just in case server path being mounted contains
2192 * symlinks and fails w/STALE, save the initial sv_path
2193 * so we can redrive the initial mount compound with the
2194 * initial sv_path -- not a symlink-expanded version.
2195 *
2196 * This could only happen if a symlink was expanded
2197 * and the expanded mount compound failed stale. Because
2198 * it could be the case that the symlink was removed at
2199 * the server (and replaced with another symlink/dir,
2200 * we need to use the initial sv_path when attempting
2201 * to re-lookup everything and recover.
2202 */
2203 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
2204 origsvp = copy_svp(svp);
2205 nfs_rw_exit(&svp->sv_lock);
2206
2207 num_retry = nfs4_max_mount_retry;
2208
2209 do {
2210 /*
2211 * Get the root fh from the server. Retry nfs4_max_mount_retry
2212 * (2) times if it fails with STALE since the recovery
2213 * infrastructure doesn't do STALE recovery for components
2214 * of the server path to the object being mounted.
2215 */
2216 nfs4getfh_otw(mi, svp, &vtype, getfh_flags, CRED(), ep);
2217
2218 if (ep->error == 0 && ep->stat == NFS4_OK)
2219 break;
2220
2221 /*
2222 * For some reason, the mount compound failed. Before
2223 * retrying, we need to restore original conditions.
2224 */
2225 svp = restore_svp(mi, svp, origsvp);
2226
2227 } while (num_retry-- > 0);
2228
2229 sv4_free(origsvp);
2230
2231 if (ep->error != 0 || ep->stat != 0) {
2232 return;
2233 }
2234
2235 if (vtype != VNON && vtype != mi->mi_type) {
2236 /* shouldn't happen */
2237 zcmn_err(mi->mi_zone->zone_id, CE_WARN,
2238 "nfs4_remap_root: server root vnode type (%d) doesn't "
2239 "match mount info (%d)", vtype, mi->mi_type);
2240 }
2241
2242 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
2243 rootfh.nfs_fh4_val = svp->sv_fhandle.fh_buf;
2244 rootfh.nfs_fh4_len = svp->sv_fhandle.fh_len;
2245 nfs_rw_exit(&svp->sv_lock);
2246 sfh4_update(mi->mi_rootfh, &rootfh);
2247
2248 /*
2249 * It's possible that recovery took place on the filesystem
2250 * and the server has been updated between the time we did
2251 * the nfs4getfh_otw and now. Re-drive the otw operation
2252 * to make sure we have a good fh.
2253 */
2254 mutex_enter(&mi->mi_lock);
2255 if (mi->mi_curr_serv != svp)
2256 goto remap_retry;
2257
2258 mutex_exit(&mi->mi_lock);
2259 }
2260
2261 static int
nfs4rootvp(vnode_t ** rtvpp,vfs_t * vfsp,struct servinfo4 * svp_head,int flags,cred_t * cr,zone_t * zone)2262 nfs4rootvp(vnode_t **rtvpp, vfs_t *vfsp, struct servinfo4 *svp_head,
2263 int flags, cred_t *cr, zone_t *zone)
2264 {
2265 vnode_t *rtvp = NULL;
2266 mntinfo4_t *mi;
2267 dev_t nfs_dev;
2268 int error = 0;
2269 rnode4_t *rp;
2270 int i, len;
2271 struct vattr va;
2272 vtype_t vtype = VNON;
2273 vtype_t tmp_vtype = VNON;
2274 struct servinfo4 *firstsvp = NULL, *svp = svp_head;
2275 nfs4_oo_hash_bucket_t *bucketp;
2276 nfs_fh4 fh;
2277 char *droptext = "";
2278 struct nfs_stats *nfsstatsp;
2279 nfs4_fname_t *mfname;
2280 nfs4_error_t e;
2281 int num_retry, removed;
2282 cred_t *lcr = NULL, *tcr = cr;
2283 struct servinfo4 *origsvp;
2284 char *resource;
2285
2286 nfsstatsp = zone_getspecific(nfsstat_zone_key, nfs_zone());
2287 ASSERT(nfsstatsp != NULL);
2288
2289 ASSERT(nfs_zone() == zone);
2290 ASSERT(crgetref(cr));
2291
2292 /*
2293 * Create a mount record and link it to the vfs struct.
2294 */
2295 mi = kmem_zalloc(sizeof (*mi), KM_SLEEP);
2296 mutex_init(&mi->mi_lock, NULL, MUTEX_DEFAULT, NULL);
2297 nfs_rw_init(&mi->mi_recovlock, NULL, RW_DEFAULT, NULL);
2298 nfs_rw_init(&mi->mi_rename_lock, NULL, RW_DEFAULT, NULL);
2299 nfs_rw_init(&mi->mi_fh_lock, NULL, RW_DEFAULT, NULL);
2300
2301 if (!(flags & NFSMNT_SOFT))
2302 mi->mi_flags |= MI4_HARD;
2303 if ((flags & NFSMNT_NOPRINT))
2304 mi->mi_flags |= MI4_NOPRINT;
2305 if (flags & NFSMNT_INT)
2306 mi->mi_flags |= MI4_INT;
2307 if (flags & NFSMNT_PUBLIC)
2308 mi->mi_flags |= MI4_PUBLIC;
2309 if (flags & NFSMNT_MIRRORMOUNT)
2310 mi->mi_flags |= MI4_MIRRORMOUNT;
2311 if (flags & NFSMNT_REFERRAL)
2312 mi->mi_flags |= MI4_REFERRAL;
2313 mi->mi_retrans = NFS_RETRIES;
2314 if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD ||
2315 svp->sv_knconf->knc_semantics == NC_TPI_COTS)
2316 mi->mi_timeo = nfs4_cots_timeo;
2317 else
2318 mi->mi_timeo = NFS_TIMEO;
2319 mi->mi_prog = NFS_PROGRAM;
2320 mi->mi_vers = NFS_V4;
2321 mi->mi_rfsnames = rfsnames_v4;
2322 mi->mi_reqs = nfsstatsp->nfs_stats_v4.rfsreqcnt_ptr;
2323 cv_init(&mi->mi_failover_cv, NULL, CV_DEFAULT, NULL);
2324 mi->mi_servers = svp;
2325 mi->mi_curr_serv = svp;
2326 mi->mi_acregmin = SEC2HR(ACREGMIN);
2327 mi->mi_acregmax = SEC2HR(ACREGMAX);
2328 mi->mi_acdirmin = SEC2HR(ACDIRMIN);
2329 mi->mi_acdirmax = SEC2HR(ACDIRMAX);
2330 mi->mi_fh_expire_type = FH4_PERSISTENT;
2331 mi->mi_clientid_next = NULL;
2332 mi->mi_clientid_prev = NULL;
2333 mi->mi_srv = NULL;
2334 mi->mi_grace_wait = 0;
2335 mi->mi_error = 0;
2336 mi->mi_srvsettime = 0;
2337 mi->mi_srvset_cnt = 0;
2338
2339 mi->mi_count = 1;
2340
2341 mi->mi_tsize = nfs4_tsize(svp->sv_knconf);
2342 mi->mi_stsize = mi->mi_tsize;
2343
2344 if (flags & NFSMNT_DIRECTIO)
2345 mi->mi_flags |= MI4_DIRECTIO;
2346
2347 mi->mi_flags |= MI4_MOUNTING;
2348
2349 /*
2350 * Make a vfs struct for nfs. We do this here instead of below
2351 * because rtvp needs a vfs before we can do a getattr on it.
2352 *
2353 * Assign a unique device id to the mount
2354 */
2355 mutex_enter(&nfs_minor_lock);
2356 do {
2357 nfs_minor = (nfs_minor + 1) & MAXMIN32;
2358 nfs_dev = makedevice(nfs_major, nfs_minor);
2359 } while (vfs_devismounted(nfs_dev));
2360 mutex_exit(&nfs_minor_lock);
2361
2362 vfsp->vfs_dev = nfs_dev;
2363 vfs_make_fsid(&vfsp->vfs_fsid, nfs_dev, nfs4fstyp);
2364 vfsp->vfs_data = (caddr_t)mi;
2365 vfsp->vfs_fstype = nfsfstyp;
2366 vfsp->vfs_bsize = nfs4_bsize;
2367
2368 /*
2369 * Initialize fields used to support async putpage operations.
2370 */
2371 for (i = 0; i < NFS4_ASYNC_TYPES; i++)
2372 mi->mi_async_clusters[i] = nfs4_async_clusters;
2373 mi->mi_async_init_clusters = nfs4_async_clusters;
2374 mi->mi_async_curr[NFS4_ASYNC_QUEUE] =
2375 mi->mi_async_curr[NFS4_ASYNC_PGOPS_QUEUE] = &mi->mi_async_reqs[0];
2376 mi->mi_max_threads = nfs4_max_threads;
2377 mutex_init(&mi->mi_async_lock, NULL, MUTEX_DEFAULT, NULL);
2378 cv_init(&mi->mi_async_reqs_cv, NULL, CV_DEFAULT, NULL);
2379 cv_init(&mi->mi_async_work_cv[NFS4_ASYNC_QUEUE], NULL, CV_DEFAULT,
2380 NULL);
2381 cv_init(&mi->mi_async_work_cv[NFS4_ASYNC_PGOPS_QUEUE], NULL,
2382 CV_DEFAULT, NULL);
2383 cv_init(&mi->mi_async_cv, NULL, CV_DEFAULT, NULL);
2384 cv_init(&mi->mi_inact_req_cv, NULL, CV_DEFAULT, NULL);
2385
2386 mi->mi_vfsp = vfsp;
2387 mi->mi_zone = zone;
2388 zone_init_ref(&mi->mi_zone_ref);
2389 zone_hold_ref(zone, &mi->mi_zone_ref, ZONE_REF_NFSV4);
2390 nfs4_mi_zonelist_add(mi);
2391
2392 /*
2393 * Initialize the <open owner/cred> hash table.
2394 */
2395 for (i = 0; i < NFS4_NUM_OO_BUCKETS; i++) {
2396 bucketp = &(mi->mi_oo_list[i]);
2397 mutex_init(&bucketp->b_lock, NULL, MUTEX_DEFAULT, NULL);
2398 list_create(&bucketp->b_oo_hash_list,
2399 sizeof (nfs4_open_owner_t),
2400 offsetof(nfs4_open_owner_t, oo_hash_node));
2401 }
2402
2403 /*
2404 * Initialize the freed open owner list.
2405 */
2406 mi->mi_foo_num = 0;
2407 mi->mi_foo_max = NFS4_NUM_FREED_OPEN_OWNERS;
2408 list_create(&mi->mi_foo_list, sizeof (nfs4_open_owner_t),
2409 offsetof(nfs4_open_owner_t, oo_foo_node));
2410
2411 list_create(&mi->mi_lost_state, sizeof (nfs4_lost_rqst_t),
2412 offsetof(nfs4_lost_rqst_t, lr_node));
2413
2414 list_create(&mi->mi_bseqid_list, sizeof (nfs4_bseqid_entry_t),
2415 offsetof(nfs4_bseqid_entry_t, bs_node));
2416
2417 /*
2418 * Initialize the msg buffer.
2419 */
2420 list_create(&mi->mi_msg_list, sizeof (nfs4_debug_msg_t),
2421 offsetof(nfs4_debug_msg_t, msg_node));
2422 mi->mi_msg_count = 0;
2423 mutex_init(&mi->mi_msg_list_lock, NULL, MUTEX_DEFAULT, NULL);
2424
2425 /*
2426 * Initialize kstats
2427 */
2428 nfs4_mnt_kstat_init(vfsp);
2429
2430 /*
2431 * Initialize the shared filehandle pool.
2432 */
2433 sfh4_createtab(&mi->mi_filehandles);
2434
2435 /*
2436 * Save server path we're attempting to mount.
2437 */
2438 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
2439 origsvp = copy_svp(svp);
2440 nfs_rw_exit(&svp->sv_lock);
2441
2442 /*
2443 * Make the GETFH call to get root fh for each replica.
2444 */
2445 if (svp_head->sv_next)
2446 droptext = ", dropping replica";
2447
2448 /*
2449 * If the uid is set then set the creds for secure mounts
2450 * by proxy processes such as automountd.
2451 */
2452 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
2453 if (svp->sv_secdata->uid != 0 &&
2454 svp->sv_secdata->rpcflavor == RPCSEC_GSS) {
2455 lcr = crdup(cr);
2456 (void) crsetugid(lcr, svp->sv_secdata->uid, crgetgid(cr));
2457 tcr = lcr;
2458 }
2459 nfs_rw_exit(&svp->sv_lock);
2460 for (svp = svp_head; svp; svp = svp->sv_next) {
2461 if (nfs4_chkdup_servinfo4(svp_head, svp)) {
2462 nfs_cmn_err(error, CE_WARN,
2463 VERS_MSG "Host %s is a duplicate%s",
2464 svp->sv_hostname, droptext);
2465 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
2466 svp->sv_flags |= SV4_NOTINUSE;
2467 nfs_rw_exit(&svp->sv_lock);
2468 continue;
2469 }
2470 mi->mi_curr_serv = svp;
2471
2472 /*
2473 * Just in case server path being mounted contains
2474 * symlinks and fails w/STALE, save the initial sv_path
2475 * so we can redrive the initial mount compound with the
2476 * initial sv_path -- not a symlink-expanded version.
2477 *
2478 * This could only happen if a symlink was expanded
2479 * and the expanded mount compound failed stale. Because
2480 * it could be the case that the symlink was removed at
2481 * the server (and replaced with another symlink/dir,
2482 * we need to use the initial sv_path when attempting
2483 * to re-lookup everything and recover.
2484 *
2485 * Other mount errors should evenutally be handled here also
2486 * (NFS4ERR_DELAY, NFS4ERR_RESOURCE). For now, all mount
2487 * failures will result in mount being redriven a few times.
2488 */
2489 num_retry = nfs4_max_mount_retry;
2490 do {
2491 nfs4getfh_otw(mi, svp, &tmp_vtype,
2492 ((flags & NFSMNT_PUBLIC) ? NFS4_GETFH_PUBLIC : 0) |
2493 NFS4_GETFH_NEEDSOP, tcr, &e);
2494
2495 if (e.error == 0 && e.stat == NFS4_OK)
2496 break;
2497
2498 /*
2499 * For some reason, the mount compound failed. Before
2500 * retrying, we need to restore original conditions.
2501 */
2502 svp = restore_svp(mi, svp, origsvp);
2503 svp_head = svp;
2504
2505 } while (num_retry-- > 0);
2506 error = e.error ? e.error : geterrno4(e.stat);
2507 if (error) {
2508 nfs_cmn_err(error, CE_WARN,
2509 VERS_MSG "initial call to %s failed%s: %m",
2510 svp->sv_hostname, droptext);
2511 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
2512 svp->sv_flags |= SV4_NOTINUSE;
2513 nfs_rw_exit(&svp->sv_lock);
2514 mi->mi_flags &= ~MI4_RECOV_FAIL;
2515 mi->mi_error = 0;
2516 continue;
2517 }
2518
2519 if (tmp_vtype == VBAD) {
2520 zcmn_err(mi->mi_zone->zone_id, CE_WARN,
2521 VERS_MSG "%s returned a bad file type for "
2522 "root%s", svp->sv_hostname, droptext);
2523 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
2524 svp->sv_flags |= SV4_NOTINUSE;
2525 nfs_rw_exit(&svp->sv_lock);
2526 continue;
2527 }
2528
2529 if (vtype == VNON) {
2530 vtype = tmp_vtype;
2531 } else if (vtype != tmp_vtype) {
2532 zcmn_err(mi->mi_zone->zone_id, CE_WARN,
2533 VERS_MSG "%s returned a different file type "
2534 "for root%s", svp->sv_hostname, droptext);
2535 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
2536 svp->sv_flags |= SV4_NOTINUSE;
2537 nfs_rw_exit(&svp->sv_lock);
2538 continue;
2539 }
2540 if (firstsvp == NULL)
2541 firstsvp = svp;
2542 }
2543
2544 if (firstsvp == NULL) {
2545 if (error == 0)
2546 error = ENOENT;
2547 goto bad;
2548 }
2549
2550 mi->mi_curr_serv = svp = firstsvp;
2551 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
2552 ASSERT((mi->mi_curr_serv->sv_flags & SV4_NOTINUSE) == 0);
2553 fh.nfs_fh4_len = svp->sv_fhandle.fh_len;
2554 fh.nfs_fh4_val = svp->sv_fhandle.fh_buf;
2555 mi->mi_rootfh = sfh4_get(&fh, mi);
2556 fh.nfs_fh4_len = svp->sv_pfhandle.fh_len;
2557 fh.nfs_fh4_val = svp->sv_pfhandle.fh_buf;
2558 mi->mi_srvparentfh = sfh4_get(&fh, mi);
2559 nfs_rw_exit(&svp->sv_lock);
2560
2561 /*
2562 * Get the fname for filesystem root.
2563 */
2564 mi->mi_fname = fn_get(NULL, ".", mi->mi_rootfh);
2565 mfname = mi->mi_fname;
2566 fn_hold(mfname);
2567
2568 /*
2569 * Make the root vnode without attributes.
2570 */
2571 rtvp = makenfs4node_by_fh(mi->mi_rootfh, NULL,
2572 &mfname, NULL, mi, cr, gethrtime());
2573 rtvp->v_type = vtype;
2574
2575 mi->mi_curread = mi->mi_tsize;
2576 mi->mi_curwrite = mi->mi_stsize;
2577
2578 /*
2579 * Start the manager thread responsible for handling async worker
2580 * threads.
2581 */
2582 MI4_HOLD(mi);
2583 VFS_HOLD(vfsp); /* add reference for thread */
2584 mi->mi_manager_thread = zthread_create(NULL, 0, nfs4_async_manager,
2585 vfsp, 0, minclsyspri);
2586 ASSERT(mi->mi_manager_thread != NULL);
2587
2588 /*
2589 * Create the thread that handles over-the-wire calls for
2590 * VOP_INACTIVE.
2591 * This needs to happen after the manager thread is created.
2592 */
2593 MI4_HOLD(mi);
2594 mi->mi_inactive_thread = zthread_create(NULL, 0, nfs4_inactive_thread,
2595 mi, 0, minclsyspri);
2596 ASSERT(mi->mi_inactive_thread != NULL);
2597
2598 /* If we didn't get a type, get one now */
2599 if (rtvp->v_type == VNON) {
2600 va.va_mask = AT_TYPE;
2601 error = nfs4getattr(rtvp, &va, tcr);
2602 if (error)
2603 goto bad;
2604 rtvp->v_type = va.va_type;
2605 }
2606
2607 mi->mi_type = rtvp->v_type;
2608
2609 mutex_enter(&mi->mi_lock);
2610 mi->mi_flags &= ~MI4_MOUNTING;
2611 mutex_exit(&mi->mi_lock);
2612
2613 /* Update VFS with new server and path info */
2614 if ((strcmp(svp->sv_hostname, origsvp->sv_hostname) != 0) ||
2615 (strcmp(svp->sv_path, origsvp->sv_path) != 0)) {
2616 len = svp->sv_hostnamelen + svp->sv_pathlen;
2617 resource = kmem_zalloc(len, KM_SLEEP);
2618 (void) strcat(resource, svp->sv_hostname);
2619 (void) strcat(resource, ":");
2620 (void) strcat(resource, svp->sv_path);
2621 vfs_setresource(vfsp, resource, 0);
2622 kmem_free(resource, len);
2623 }
2624
2625 sv4_free(origsvp);
2626 *rtvpp = rtvp;
2627 if (lcr != NULL)
2628 crfree(lcr);
2629
2630 return (0);
2631 bad:
2632 /*
2633 * An error occurred somewhere, need to clean up...
2634 */
2635 if (lcr != NULL)
2636 crfree(lcr);
2637
2638 if (rtvp != NULL) {
2639 /*
2640 * We need to release our reference to the root vnode and
2641 * destroy the mntinfo4 struct that we just created.
2642 */
2643 rp = VTOR4(rtvp);
2644 if (rp->r_flags & R4HASHED)
2645 rp4_rmhash(rp);
2646 VN_RELE(rtvp);
2647 }
2648 nfs4_async_stop(vfsp);
2649 nfs4_async_manager_stop(vfsp);
2650 removed = nfs4_mi_zonelist_remove(mi);
2651 if (removed)
2652 zone_rele_ref(&mi->mi_zone_ref, ZONE_REF_NFSV4);
2653
2654 /*
2655 * This releases the initial "hold" of the mi since it will never
2656 * be referenced by the vfsp. Also, when mount returns to vfs.c
2657 * with an error, the vfsp will be destroyed, not rele'd.
2658 */
2659 MI4_RELE(mi);
2660
2661 if (origsvp != NULL)
2662 sv4_free(origsvp);
2663
2664 *rtvpp = NULL;
2665 return (error);
2666 }
2667
2668 /*
2669 * vfs operations
2670 */
2671 static int
nfs4_unmount(vfs_t * vfsp,int flag,cred_t * cr)2672 nfs4_unmount(vfs_t *vfsp, int flag, cred_t *cr)
2673 {
2674 mntinfo4_t *mi;
2675 ushort_t omax;
2676 int removed;
2677
2678 bool_t must_unlock;
2679
2680 nfs4_ephemeral_tree_t *eph_tree;
2681
2682 if (secpolicy_fs_unmount(cr, vfsp) != 0)
2683 return (EPERM);
2684
2685 mi = VFTOMI4(vfsp);
2686
2687 if (flag & MS_FORCE) {
2688 vfsp->vfs_flag |= VFS_UNMOUNTED;
2689 if (nfs_zone() != mi->mi_zone) {
2690 /*
2691 * If the request is coming from the wrong zone,
2692 * we don't want to create any new threads, and
2693 * performance is not a concern. Do everything
2694 * inline.
2695 */
2696 NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE,
2697 "nfs4_unmount x-zone forced unmount of vfs %p\n",
2698 (void *)vfsp));
2699 nfs4_free_mount(vfsp, flag, cr);
2700 } else {
2701 /*
2702 * Free data structures asynchronously, to avoid
2703 * blocking the current thread (for performance
2704 * reasons only).
2705 */
2706 async_free_mount(vfsp, flag, cr);
2707 }
2708
2709 return (0);
2710 }
2711
2712 /*
2713 * Wait until all asynchronous putpage operations on
2714 * this file system are complete before flushing rnodes
2715 * from the cache.
2716 */
2717 omax = mi->mi_max_threads;
2718 if (nfs4_async_stop_sig(vfsp))
2719 return (EINTR);
2720
2721 r4flush(vfsp, cr);
2722
2723 /*
2724 * About the only reason that this would fail would be
2725 * that the harvester is already busy tearing down this
2726 * node. So we fail back to the caller and let them try
2727 * again when needed.
2728 */
2729 if (nfs4_ephemeral_umount(mi, flag, cr,
2730 &must_unlock, &eph_tree)) {
2731 ASSERT(must_unlock == FALSE);
2732 mutex_enter(&mi->mi_async_lock);
2733 mi->mi_max_threads = omax;
2734 mutex_exit(&mi->mi_async_lock);
2735
2736 return (EBUSY);
2737 }
2738
2739 /*
2740 * If there are any active vnodes on this file system,
2741 * then the file system is busy and can't be unmounted.
2742 */
2743 if (check_rtable4(vfsp)) {
2744 nfs4_ephemeral_umount_unlock(&must_unlock, &eph_tree);
2745
2746 mutex_enter(&mi->mi_async_lock);
2747 mi->mi_max_threads = omax;
2748 mutex_exit(&mi->mi_async_lock);
2749
2750 return (EBUSY);
2751 }
2752
2753 /*
2754 * The unmount can't fail from now on, so record any
2755 * ephemeral changes.
2756 */
2757 nfs4_ephemeral_umount_activate(mi, &must_unlock, &eph_tree);
2758
2759 /*
2760 * There are no active files that could require over-the-wire
2761 * calls to the server, so stop the async manager and the
2762 * inactive thread.
2763 */
2764 nfs4_async_manager_stop(vfsp);
2765
2766 /*
2767 * Destroy all rnodes belonging to this file system from the
2768 * rnode hash queues and purge any resources allocated to
2769 * them.
2770 */
2771 destroy_rtable4(vfsp, cr);
2772 vfsp->vfs_flag |= VFS_UNMOUNTED;
2773
2774 nfs4_remove_mi_from_server(mi, NULL);
2775 removed = nfs4_mi_zonelist_remove(mi);
2776 if (removed)
2777 zone_rele_ref(&mi->mi_zone_ref, ZONE_REF_NFSV4);
2778
2779 return (0);
2780 }
2781
2782 /*
2783 * find root of nfs
2784 */
2785 static int
nfs4_root(vfs_t * vfsp,vnode_t ** vpp)2786 nfs4_root(vfs_t *vfsp, vnode_t **vpp)
2787 {
2788 mntinfo4_t *mi;
2789 vnode_t *vp;
2790 nfs4_fname_t *mfname;
2791 servinfo4_t *svp;
2792
2793 mi = VFTOMI4(vfsp);
2794
2795 if (nfs_zone() != mi->mi_zone)
2796 return (EPERM);
2797
2798 svp = mi->mi_curr_serv;
2799 if (svp) {
2800 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
2801 if (svp->sv_flags & SV4_ROOT_STALE) {
2802 nfs_rw_exit(&svp->sv_lock);
2803
2804 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
2805 if (svp->sv_flags & SV4_ROOT_STALE) {
2806 svp->sv_flags &= ~SV4_ROOT_STALE;
2807 nfs_rw_exit(&svp->sv_lock);
2808 return (ENOENT);
2809 }
2810 nfs_rw_exit(&svp->sv_lock);
2811 } else
2812 nfs_rw_exit(&svp->sv_lock);
2813 }
2814
2815 mfname = mi->mi_fname;
2816 fn_hold(mfname);
2817 vp = makenfs4node_by_fh(mi->mi_rootfh, NULL, &mfname, NULL,
2818 VFTOMI4(vfsp), CRED(), gethrtime());
2819
2820 if (VTOR4(vp)->r_flags & R4STALE) {
2821 VN_RELE(vp);
2822 return (ENOENT);
2823 }
2824
2825 ASSERT(vp->v_type == VNON || vp->v_type == mi->mi_type);
2826
2827 vp->v_type = mi->mi_type;
2828
2829 *vpp = vp;
2830
2831 return (0);
2832 }
2833
2834 static int
nfs4_statfs_otw(vnode_t * vp,struct statvfs64 * sbp,cred_t * cr)2835 nfs4_statfs_otw(vnode_t *vp, struct statvfs64 *sbp, cred_t *cr)
2836 {
2837 int error;
2838 nfs4_ga_res_t gar;
2839 nfs4_ga_ext_res_t ger;
2840
2841 gar.n4g_ext_res = &ger;
2842
2843 if (error = nfs4_attr_otw(vp, TAG_FSINFO, &gar,
2844 NFS4_STATFS_ATTR_MASK, cr))
2845 return (error);
2846
2847 *sbp = gar.n4g_ext_res->n4g_sb;
2848
2849 return (0);
2850 }
2851
2852 /*
2853 * Get file system statistics.
2854 */
2855 static int
nfs4_statvfs(vfs_t * vfsp,struct statvfs64 * sbp)2856 nfs4_statvfs(vfs_t *vfsp, struct statvfs64 *sbp)
2857 {
2858 int error;
2859 vnode_t *vp;
2860 cred_t *cr;
2861
2862 error = nfs4_root(vfsp, &vp);
2863 if (error)
2864 return (error);
2865
2866 cr = CRED();
2867
2868 error = nfs4_statfs_otw(vp, sbp, cr);
2869 if (!error) {
2870 (void) strncpy(sbp->f_basetype,
2871 vfssw[vfsp->vfs_fstype].vsw_name, FSTYPSZ);
2872 sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
2873 } else {
2874 nfs4_purge_stale_fh(error, vp, cr);
2875 }
2876
2877 VN_RELE(vp);
2878
2879 return (error);
2880 }
2881
2882 static kmutex_t nfs4_syncbusy;
2883
2884 /*
2885 * Flush dirty nfs files for file system vfsp.
2886 * If vfsp == NULL, all nfs files are flushed.
2887 *
2888 * SYNC_CLOSE in flag is passed to us to
2889 * indicate that we are shutting down and or
2890 * rebooting.
2891 */
2892 static int
nfs4_sync(vfs_t * vfsp,short flag,cred_t * cr)2893 nfs4_sync(vfs_t *vfsp, short flag, cred_t *cr)
2894 {
2895 /*
2896 * Cross-zone calls are OK here, since this translates to a
2897 * VOP_PUTPAGE(B_ASYNC), which gets picked up by the right zone.
2898 */
2899 if (!(flag & SYNC_ATTR) && mutex_tryenter(&nfs4_syncbusy) != 0) {
2900 r4flush(vfsp, cr);
2901 mutex_exit(&nfs4_syncbusy);
2902 }
2903
2904 /*
2905 * if SYNC_CLOSE is set then we know that
2906 * the system is rebooting, mark the mntinfo
2907 * for later examination.
2908 */
2909 if (vfsp && (flag & SYNC_CLOSE)) {
2910 mntinfo4_t *mi;
2911
2912 mi = VFTOMI4(vfsp);
2913 if (!(mi->mi_flags & MI4_SHUTDOWN)) {
2914 mutex_enter(&mi->mi_lock);
2915 mi->mi_flags |= MI4_SHUTDOWN;
2916 mutex_exit(&mi->mi_lock);
2917 }
2918 }
2919 return (0);
2920 }
2921
2922 /*
2923 * vget is difficult, if not impossible, to support in v4 because we don't
2924 * know the parent directory or name, which makes it impossible to create a
2925 * useful shadow vnode. And we need the shadow vnode for things like
2926 * OPEN.
2927 */
2928
2929 /* ARGSUSED */
2930 /*
2931 * XXX Check nfs4_vget_pseudo() for dependency.
2932 */
2933 static int
nfs4_vget(vfs_t * vfsp,vnode_t ** vpp,fid_t * fidp)2934 nfs4_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
2935 {
2936 return (EREMOTE);
2937 }
2938
2939 /*
2940 * nfs4_mountroot get called in the case where we are diskless booting. All
2941 * we need from here is the ability to get the server info and from there we
2942 * can simply call nfs4_rootvp.
2943 */
2944 /* ARGSUSED */
2945 static int
nfs4_mountroot(vfs_t * vfsp,whymountroot_t why)2946 nfs4_mountroot(vfs_t *vfsp, whymountroot_t why)
2947 {
2948 vnode_t *rtvp;
2949 char root_hostname[SYS_NMLN+1];
2950 struct servinfo4 *svp;
2951 int error;
2952 int vfsflags;
2953 size_t size;
2954 char *root_path;
2955 struct pathname pn;
2956 char *name;
2957 cred_t *cr;
2958 mntinfo4_t *mi;
2959 struct nfs_args args; /* nfs mount arguments */
2960 static char token[10];
2961 nfs4_error_t n4e;
2962
2963 bzero(&args, sizeof (args));
2964
2965 /* do this BEFORE getfile which causes xid stamps to be initialized */
2966 clkset(-1L); /* hack for now - until we get time svc? */
2967
2968 if (why == ROOT_REMOUNT) {
2969 /*
2970 * Shouldn't happen.
2971 */
2972 panic("nfs4_mountroot: why == ROOT_REMOUNT");
2973 }
2974
2975 if (why == ROOT_UNMOUNT) {
2976 /*
2977 * Nothing to do for NFS.
2978 */
2979 return (0);
2980 }
2981
2982 /*
2983 * why == ROOT_INIT
2984 */
2985
2986 name = token;
2987 *name = 0;
2988 (void) getfsname("root", name, sizeof (token));
2989
2990 pn_alloc(&pn);
2991 root_path = pn.pn_path;
2992
2993 svp = kmem_zalloc(sizeof (*svp), KM_SLEEP);
2994 nfs_rw_init(&svp->sv_lock, NULL, RW_DEFAULT, NULL);
2995 svp->sv_knconf = kmem_zalloc(sizeof (*svp->sv_knconf), KM_SLEEP);
2996 svp->sv_knconf->knc_protofmly = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
2997 svp->sv_knconf->knc_proto = kmem_alloc(KNC_STRSIZE, KM_SLEEP);
2998
2999 /*
3000 * Get server address
3001 * Get the root path
3002 * Get server's transport
3003 * Get server's hostname
3004 * Get options
3005 */
3006 args.addr = &svp->sv_addr;
3007 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
3008 args.fh = (char *)&svp->sv_fhandle;
3009 args.knconf = svp->sv_knconf;
3010 args.hostname = root_hostname;
3011 vfsflags = 0;
3012 if (error = mount_root(*name ? name : "root", root_path, NFS_V4,
3013 &args, &vfsflags)) {
3014 if (error == EPROTONOSUPPORT)
3015 nfs_cmn_err(error, CE_WARN, "nfs4_mountroot: "
3016 "mount_root failed: server doesn't support NFS V4");
3017 else
3018 nfs_cmn_err(error, CE_WARN,
3019 "nfs4_mountroot: mount_root failed: %m");
3020 nfs_rw_exit(&svp->sv_lock);
3021 sv4_free(svp);
3022 pn_free(&pn);
3023 return (error);
3024 }
3025 nfs_rw_exit(&svp->sv_lock);
3026 svp->sv_hostnamelen = (int)(strlen(root_hostname) + 1);
3027 svp->sv_hostname = kmem_alloc(svp->sv_hostnamelen, KM_SLEEP);
3028 (void) strcpy(svp->sv_hostname, root_hostname);
3029
3030 svp->sv_pathlen = (int)(strlen(root_path) + 1);
3031 svp->sv_path = kmem_alloc(svp->sv_pathlen, KM_SLEEP);
3032 (void) strcpy(svp->sv_path, root_path);
3033
3034 /*
3035 * Force root partition to always be mounted with AUTH_UNIX for now
3036 */
3037 svp->sv_secdata = kmem_alloc(sizeof (*svp->sv_secdata), KM_SLEEP);
3038 svp->sv_secdata->secmod = AUTH_UNIX;
3039 svp->sv_secdata->rpcflavor = AUTH_UNIX;
3040 svp->sv_secdata->data = NULL;
3041
3042 cr = crgetcred();
3043 rtvp = NULL;
3044
3045 error = nfs4rootvp(&rtvp, vfsp, svp, args.flags, cr, global_zone);
3046
3047 if (error) {
3048 crfree(cr);
3049 pn_free(&pn);
3050 sv4_free(svp);
3051 return (error);
3052 }
3053
3054 mi = VTOMI4(rtvp);
3055
3056 /*
3057 * Send client id to the server, if necessary
3058 */
3059 nfs4_error_zinit(&n4e);
3060 nfs4setclientid(mi, cr, FALSE, &n4e);
3061 error = n4e.error;
3062
3063 crfree(cr);
3064
3065 if (error) {
3066 pn_free(&pn);
3067 goto errout;
3068 }
3069
3070 error = nfs4_setopts(rtvp, DATAMODEL_NATIVE, &args);
3071 if (error) {
3072 nfs_cmn_err(error, CE_WARN,
3073 "nfs4_mountroot: invalid root mount options");
3074 pn_free(&pn);
3075 goto errout;
3076 }
3077
3078 (void) vfs_lock_wait(vfsp);
3079 vfs_add(NULL, vfsp, vfsflags);
3080 vfs_unlock(vfsp);
3081
3082 size = strlen(svp->sv_hostname);
3083 (void) strcpy(rootfs.bo_name, svp->sv_hostname);
3084 rootfs.bo_name[size] = ':';
3085 (void) strcpy(&rootfs.bo_name[size + 1], root_path);
3086
3087 pn_free(&pn);
3088
3089 errout:
3090 if (error) {
3091 sv4_free(svp);
3092 nfs4_async_stop(vfsp);
3093 nfs4_async_manager_stop(vfsp);
3094 }
3095
3096 if (rtvp != NULL)
3097 VN_RELE(rtvp);
3098
3099 return (error);
3100 }
3101
3102 /*
3103 * Initialization routine for VFS routines. Should only be called once
3104 */
3105 int
nfs4_vfsinit(void)3106 nfs4_vfsinit(void)
3107 {
3108 mutex_init(&nfs4_syncbusy, NULL, MUTEX_DEFAULT, NULL);
3109 nfs4setclientid_init();
3110 nfs4_ephemeral_init();
3111 return (0);
3112 }
3113
3114 void
nfs4_vfsfini(void)3115 nfs4_vfsfini(void)
3116 {
3117 nfs4_ephemeral_fini();
3118 nfs4setclientid_fini();
3119 mutex_destroy(&nfs4_syncbusy);
3120 }
3121
3122 void
nfs4_freevfs(vfs_t * vfsp)3123 nfs4_freevfs(vfs_t *vfsp)
3124 {
3125 mntinfo4_t *mi;
3126
3127 /* need to release the initial hold */
3128 mi = VFTOMI4(vfsp);
3129
3130 /*
3131 * At this point, we can no longer reference the vfs
3132 * and need to inform other holders of the reference
3133 * to the mntinfo4_t.
3134 */
3135 mi->mi_vfsp = NULL;
3136
3137 MI4_RELE(mi);
3138 }
3139
3140 /*
3141 * Client side SETCLIENTID and SETCLIENTID_CONFIRM
3142 */
3143 struct nfs4_server nfs4_server_lst =
3144 { &nfs4_server_lst, &nfs4_server_lst };
3145
3146 kmutex_t nfs4_server_lst_lock;
3147
3148 static void
nfs4setclientid_init(void)3149 nfs4setclientid_init(void)
3150 {
3151 mutex_init(&nfs4_server_lst_lock, NULL, MUTEX_DEFAULT, NULL);
3152 }
3153
3154 static void
nfs4setclientid_fini(void)3155 nfs4setclientid_fini(void)
3156 {
3157 mutex_destroy(&nfs4_server_lst_lock);
3158 }
3159
3160 int nfs4_retry_sclid_delay = NFS4_RETRY_SCLID_DELAY;
3161 int nfs4_num_sclid_retries = NFS4_NUM_SCLID_RETRIES;
3162
3163 /*
3164 * Set the clientid for the server for "mi". No-op if the clientid is
3165 * already set.
3166 *
3167 * The recovery boolean should be set to TRUE if this function was called
3168 * by the recovery code, and FALSE otherwise. This is used to determine
3169 * if we need to call nfs4_start/end_op as well as grab the mi_recovlock
3170 * for adding a mntinfo4_t to a nfs4_server_t.
3171 *
3172 * Error is returned via 'n4ep'. If there was a 'n4ep->stat' error, then
3173 * 'n4ep->error' is set to geterrno4(n4ep->stat).
3174 */
3175 void
nfs4setclientid(mntinfo4_t * mi,cred_t * cr,bool_t recovery,nfs4_error_t * n4ep)3176 nfs4setclientid(mntinfo4_t *mi, cred_t *cr, bool_t recovery, nfs4_error_t *n4ep)
3177 {
3178 struct nfs4_server *np;
3179 struct servinfo4 *svp = mi->mi_curr_serv;
3180 nfs4_recov_state_t recov_state;
3181 int num_retries = 0;
3182 bool_t retry;
3183 cred_t *lcr = NULL;
3184 int retry_inuse = 1; /* only retry once on NFS4ERR_CLID_INUSE */
3185 time_t lease_time = 0;
3186
3187 recov_state.rs_flags = 0;
3188 recov_state.rs_num_retry_despite_err = 0;
3189 ASSERT(n4ep != NULL);
3190
3191 recov_retry:
3192 retry = FALSE;
3193 nfs4_error_zinit(n4ep);
3194 if (!recovery)
3195 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0);
3196
3197 mutex_enter(&nfs4_server_lst_lock);
3198 np = servinfo4_to_nfs4_server(svp); /* This locks np if it is found */
3199 mutex_exit(&nfs4_server_lst_lock);
3200 if (!np) {
3201 struct nfs4_server *tnp;
3202 np = new_nfs4_server(svp, cr);
3203 mutex_enter(&np->s_lock);
3204
3205 mutex_enter(&nfs4_server_lst_lock);
3206 tnp = servinfo4_to_nfs4_server(svp);
3207 if (tnp) {
3208 /*
3209 * another thread snuck in and put server on list.
3210 * since we aren't adding it to the nfs4_server_list
3211 * we need to set the ref count to 0 and destroy it.
3212 */
3213 np->s_refcnt = 0;
3214 destroy_nfs4_server(np);
3215 np = tnp;
3216 } else {
3217 /*
3218 * do not give list a reference until everything
3219 * succeeds
3220 */
3221 insque(np, &nfs4_server_lst);
3222 }
3223 mutex_exit(&nfs4_server_lst_lock);
3224 }
3225 ASSERT(MUTEX_HELD(&np->s_lock));
3226 /*
3227 * If we find the server already has N4S_CLIENTID_SET, then
3228 * just return, we've already done SETCLIENTID to that server
3229 */
3230 if (np->s_flags & N4S_CLIENTID_SET) {
3231 /* add mi to np's mntinfo4_list */
3232 nfs4_add_mi_to_server(np, mi);
3233 if (!recovery)
3234 nfs_rw_exit(&mi->mi_recovlock);
3235 mutex_exit(&np->s_lock);
3236 nfs4_server_rele(np);
3237 return;
3238 }
3239 mutex_exit(&np->s_lock);
3240
3241
3242 /*
3243 * Drop the mi_recovlock since nfs4_start_op will
3244 * acquire it again for us.
3245 */
3246 if (!recovery) {
3247 nfs_rw_exit(&mi->mi_recovlock);
3248
3249 n4ep->error = nfs4_start_op(mi, NULL, NULL, &recov_state);
3250 if (n4ep->error) {
3251 nfs4_server_rele(np);
3252 return;
3253 }
3254 }
3255
3256 mutex_enter(&np->s_lock);
3257 while (np->s_flags & N4S_CLIENTID_PEND) {
3258 if (!cv_wait_sig(&np->s_clientid_pend, &np->s_lock)) {
3259 mutex_exit(&np->s_lock);
3260 nfs4_server_rele(np);
3261 if (!recovery)
3262 nfs4_end_op(mi, NULL, NULL, &recov_state,
3263 recovery);
3264 n4ep->error = EINTR;
3265 return;
3266 }
3267 }
3268
3269 if (np->s_flags & N4S_CLIENTID_SET) {
3270 /* XXX copied/pasted from above */
3271 /* add mi to np's mntinfo4_list */
3272 nfs4_add_mi_to_server(np, mi);
3273 mutex_exit(&np->s_lock);
3274 nfs4_server_rele(np);
3275 if (!recovery)
3276 nfs4_end_op(mi, NULL, NULL, &recov_state, recovery);
3277 return;
3278 }
3279
3280 /*
3281 * Reset the N4S_CB_PINGED flag. This is used to
3282 * indicate if we have received a CB_NULL from the
3283 * server. Also we reset the waiter flag.
3284 */
3285 np->s_flags &= ~(N4S_CB_PINGED | N4S_CB_WAITER);
3286 /* any failure must now clear this flag */
3287 np->s_flags |= N4S_CLIENTID_PEND;
3288 mutex_exit(&np->s_lock);
3289 nfs4setclientid_otw(mi, svp, cr, np, n4ep, &retry_inuse);
3290
3291 if (n4ep->error == EACCES) {
3292 /*
3293 * If the uid is set then set the creds for secure mounts
3294 * by proxy processes such as automountd.
3295 */
3296 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0);
3297 if (svp->sv_secdata->uid != 0) {
3298 lcr = crdup(cr);
3299 (void) crsetugid(lcr, svp->sv_secdata->uid,
3300 crgetgid(cr));
3301 }
3302 nfs_rw_exit(&svp->sv_lock);
3303
3304 if (lcr != NULL) {
3305 mutex_enter(&np->s_lock);
3306 crfree(np->s_cred);
3307 np->s_cred = lcr;
3308 mutex_exit(&np->s_lock);
3309 nfs4setclientid_otw(mi, svp, lcr, np, n4ep,
3310 &retry_inuse);
3311 }
3312 }
3313 mutex_enter(&np->s_lock);
3314 lease_time = np->s_lease_time;
3315 np->s_flags &= ~N4S_CLIENTID_PEND;
3316 mutex_exit(&np->s_lock);
3317
3318 if (n4ep->error != 0 || n4ep->stat != NFS4_OK) {
3319 /*
3320 * Start recovery if failover is a possibility. If
3321 * invoked by the recovery thread itself, then just
3322 * return and let it handle the failover first. NB:
3323 * recovery is not allowed if the mount is in progress
3324 * since the infrastructure is not sufficiently setup
3325 * to allow it. Just return the error (after suitable
3326 * retries).
3327 */
3328 if (FAILOVER_MOUNT4(mi) && nfs4_try_failover(n4ep)) {
3329 (void) nfs4_start_recovery(n4ep, mi, NULL,
3330 NULL, NULL, NULL, OP_SETCLIENTID, NULL, NULL, NULL);
3331 /*
3332 * Don't retry here, just return and let
3333 * recovery take over.
3334 */
3335 if (recovery)
3336 retry = FALSE;
3337 } else if (nfs4_rpc_retry_error(n4ep->error) ||
3338 n4ep->stat == NFS4ERR_RESOURCE ||
3339 n4ep->stat == NFS4ERR_STALE_CLIENTID) {
3340
3341 retry = TRUE;
3342 /*
3343 * Always retry if in recovery or once had
3344 * contact with the server (but now it's
3345 * overloaded).
3346 */
3347 if (recovery == TRUE ||
3348 n4ep->error == ETIMEDOUT ||
3349 n4ep->error == ECONNRESET)
3350 num_retries = 0;
3351 } else if (retry_inuse && n4ep->error == 0 &&
3352 n4ep->stat == NFS4ERR_CLID_INUSE) {
3353 retry = TRUE;
3354 num_retries = 0;
3355 }
3356 } else {
3357 /*
3358 * Since everything succeeded give the list a reference count if
3359 * it hasn't been given one by add_new_nfs4_server() or if this
3360 * is not a recovery situation in which case it is already on
3361 * the list.
3362 */
3363 mutex_enter(&np->s_lock);
3364 if ((np->s_flags & N4S_INSERTED) == 0) {
3365 np->s_refcnt++;
3366 np->s_flags |= N4S_INSERTED;
3367 }
3368 mutex_exit(&np->s_lock);
3369 }
3370
3371 if (!recovery)
3372 nfs4_end_op(mi, NULL, NULL, &recov_state, recovery);
3373
3374
3375 if (retry && num_retries++ < nfs4_num_sclid_retries) {
3376 if (retry_inuse) {
3377 delay(SEC_TO_TICK(lease_time + nfs4_retry_sclid_delay));
3378 retry_inuse = 0;
3379 } else
3380 delay(SEC_TO_TICK(nfs4_retry_sclid_delay));
3381
3382 nfs4_server_rele(np);
3383 goto recov_retry;
3384 }
3385
3386
3387 if (n4ep->error == 0)
3388 n4ep->error = geterrno4(n4ep->stat);
3389
3390 /* broadcast before release in case no other threads are waiting */
3391 cv_broadcast(&np->s_clientid_pend);
3392 nfs4_server_rele(np);
3393 }
3394
3395 int nfs4setclientid_otw_debug = 0;
3396
3397 /*
3398 * This function handles the recovery of STALE_CLIENTID for SETCLIENTID_CONFRIM,
3399 * but nothing else; the calling function must be designed to handle those
3400 * other errors.
3401 */
3402 static void
nfs4setclientid_otw(mntinfo4_t * mi,struct servinfo4 * svp,cred_t * cr,struct nfs4_server * np,nfs4_error_t * ep,int * retry_inusep)3403 nfs4setclientid_otw(mntinfo4_t *mi, struct servinfo4 *svp, cred_t *cr,
3404 struct nfs4_server *np, nfs4_error_t *ep, int *retry_inusep)
3405 {
3406 COMPOUND4args_clnt args;
3407 COMPOUND4res_clnt res;
3408 nfs_argop4 argop[3];
3409 SETCLIENTID4args *s_args;
3410 SETCLIENTID4resok *s_resok;
3411 int doqueue = 1;
3412 nfs4_ga_res_t *garp = NULL;
3413 timespec_t prop_time, after_time;
3414 verifier4 verf;
3415 clientid4 tmp_clientid;
3416
3417 ASSERT(!MUTEX_HELD(&np->s_lock));
3418
3419 args.ctag = TAG_SETCLIENTID;
3420
3421 args.array = argop;
3422 args.array_len = 3;
3423
3424 /* PUTROOTFH */
3425 argop[0].argop = OP_PUTROOTFH;
3426
3427 /* GETATTR */
3428 argop[1].argop = OP_GETATTR;
3429 argop[1].nfs_argop4_u.opgetattr.attr_request = FATTR4_LEASE_TIME_MASK;
3430 argop[1].nfs_argop4_u.opgetattr.mi = mi;
3431
3432 /* SETCLIENTID */
3433 argop[2].argop = OP_SETCLIENTID;
3434
3435 s_args = &argop[2].nfs_argop4_u.opsetclientid;
3436
3437 mutex_enter(&np->s_lock);
3438
3439 s_args->client.verifier = np->clidtosend.verifier;
3440 s_args->client.id_len = np->clidtosend.id_len;
3441 ASSERT(s_args->client.id_len <= NFS4_OPAQUE_LIMIT);
3442 s_args->client.id_val = np->clidtosend.id_val;
3443
3444 /*
3445 * Callback needs to happen on non-RDMA transport
3446 * Check if we have saved the original knetconfig
3447 * if so, use that instead.
3448 */
3449 if (svp->sv_origknconf != NULL)
3450 nfs4_cb_args(np, svp->sv_origknconf, s_args);
3451 else
3452 nfs4_cb_args(np, svp->sv_knconf, s_args);
3453
3454 mutex_exit(&np->s_lock);
3455
3456 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep);
3457
3458 if (ep->error)
3459 return;
3460
3461 /* getattr lease_time res */
3462 if ((res.array_len >= 2) &&
3463 (res.array[1].nfs_resop4_u.opgetattr.status == NFS4_OK)) {
3464 garp = &res.array[1].nfs_resop4_u.opgetattr.ga_res;
3465
3466 #ifndef _LP64
3467 /*
3468 * The 32 bit client cannot handle a lease time greater than
3469 * (INT32_MAX/1000000). This is due to the use of the
3470 * lease_time in calls to drv_usectohz() in
3471 * nfs4_renew_lease_thread(). The problem is that
3472 * drv_usectohz() takes a time_t (which is just a long = 4
3473 * bytes) as its parameter. The lease_time is multiplied by
3474 * 1000000 to convert seconds to usecs for the parameter. If
3475 * a number bigger than (INT32_MAX/1000000) is used then we
3476 * overflow on the 32bit client.
3477 */
3478 if (garp->n4g_ext_res->n4g_leasetime > (INT32_MAX/1000000)) {
3479 garp->n4g_ext_res->n4g_leasetime = INT32_MAX/1000000;
3480 }
3481 #endif
3482
3483 mutex_enter(&np->s_lock);
3484 np->s_lease_time = garp->n4g_ext_res->n4g_leasetime;
3485
3486 /*
3487 * Keep track of the lease period for the mi's
3488 * mi_msg_list. We need an appropiate time
3489 * bound to associate past facts with a current
3490 * event. The lease period is perfect for this.
3491 */
3492 mutex_enter(&mi->mi_msg_list_lock);
3493 mi->mi_lease_period = np->s_lease_time;
3494 mutex_exit(&mi->mi_msg_list_lock);
3495 mutex_exit(&np->s_lock);
3496 }
3497
3498
3499 if (res.status == NFS4ERR_CLID_INUSE) {
3500 clientaddr4 *clid_inuse;
3501
3502 if (!(*retry_inusep)) {
3503 clid_inuse = &res.array->nfs_resop4_u.
3504 opsetclientid.SETCLIENTID4res_u.client_using;
3505
3506 zcmn_err(mi->mi_zone->zone_id, CE_NOTE,
3507 "NFS4 mount (SETCLIENTID failed)."
3508 " nfs4_client_id.id is in"
3509 "use already by: r_netid<%s> r_addr<%s>",
3510 clid_inuse->r_netid, clid_inuse->r_addr);
3511 }
3512
3513 /*
3514 * XXX - The client should be more robust in its
3515 * handling of clientid in use errors (regen another
3516 * clientid and try again?)
3517 */
3518 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
3519 return;
3520 }
3521
3522 if (res.status) {
3523 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
3524 return;
3525 }
3526
3527 s_resok = &res.array[2].nfs_resop4_u.
3528 opsetclientid.SETCLIENTID4res_u.resok4;
3529
3530 tmp_clientid = s_resok->clientid;
3531
3532 verf = s_resok->setclientid_confirm;
3533
3534 #ifdef DEBUG
3535 if (nfs4setclientid_otw_debug) {
3536 union {
3537 clientid4 clientid;
3538 int foo[2];
3539 } cid;
3540
3541 cid.clientid = s_resok->clientid;
3542
3543 zcmn_err(mi->mi_zone->zone_id, CE_NOTE,
3544 "nfs4setclientid_otw: OK, clientid = %x,%x, "
3545 "verifier = %" PRIx64 "\n", cid.foo[0], cid.foo[1], verf);
3546 }
3547 #endif
3548
3549 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
3550
3551 /* Confirm the client id and get the lease_time attribute */
3552
3553 args.ctag = TAG_SETCLIENTID_CF;
3554
3555 args.array = argop;
3556 args.array_len = 1;
3557
3558 argop[0].argop = OP_SETCLIENTID_CONFIRM;
3559
3560 argop[0].nfs_argop4_u.opsetclientid_confirm.clientid = tmp_clientid;
3561 argop[0].nfs_argop4_u.opsetclientid_confirm.setclientid_confirm = verf;
3562
3563 /* used to figure out RTT for np */
3564 gethrestime(&prop_time);
3565
3566 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setlientid_otw: "
3567 "start time: %ld sec %ld nsec", prop_time.tv_sec,
3568 prop_time.tv_nsec));
3569
3570 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep);
3571
3572 gethrestime(&after_time);
3573 mutex_enter(&np->s_lock);
3574 np->propagation_delay.tv_sec =
3575 MAX(1, after_time.tv_sec - prop_time.tv_sec);
3576 mutex_exit(&np->s_lock);
3577
3578 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setlcientid_otw: "
3579 "finish time: %ld sec ", after_time.tv_sec));
3580
3581 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4setclientid_otw: "
3582 "propagation delay set to %ld sec",
3583 np->propagation_delay.tv_sec));
3584
3585 if (ep->error)
3586 return;
3587
3588 if (res.status == NFS4ERR_CLID_INUSE) {
3589 clientaddr4 *clid_inuse;
3590
3591 if (!(*retry_inusep)) {
3592 clid_inuse = &res.array->nfs_resop4_u.
3593 opsetclientid.SETCLIENTID4res_u.client_using;
3594
3595 zcmn_err(mi->mi_zone->zone_id, CE_NOTE,
3596 "SETCLIENTID_CONFIRM failed. "
3597 "nfs4_client_id.id is in use already by: "
3598 "r_netid<%s> r_addr<%s>",
3599 clid_inuse->r_netid, clid_inuse->r_addr);
3600 }
3601
3602 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
3603 return;
3604 }
3605
3606 if (res.status) {
3607 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
3608 return;
3609 }
3610
3611 mutex_enter(&np->s_lock);
3612 np->clientid = tmp_clientid;
3613 np->s_flags |= N4S_CLIENTID_SET;
3614
3615 /* Add mi to np's mntinfo4 list */
3616 nfs4_add_mi_to_server(np, mi);
3617
3618 if (np->lease_valid == NFS4_LEASE_NOT_STARTED) {
3619 /*
3620 * Start lease management thread.
3621 * Keep trying until we succeed.
3622 */
3623
3624 np->s_refcnt++; /* pass reference to thread */
3625 (void) zthread_create(NULL, 0, nfs4_renew_lease_thread, np, 0,
3626 minclsyspri);
3627 }
3628 mutex_exit(&np->s_lock);
3629
3630 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
3631 }
3632
3633 /*
3634 * Add mi to sp's mntinfo4_list if it isn't already in the list. Makes
3635 * mi's clientid the same as sp's.
3636 * Assumes sp is locked down.
3637 */
3638 void
nfs4_add_mi_to_server(nfs4_server_t * sp,mntinfo4_t * mi)3639 nfs4_add_mi_to_server(nfs4_server_t *sp, mntinfo4_t *mi)
3640 {
3641 mntinfo4_t *tmi;
3642 int in_list = 0;
3643
3644 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) ||
3645 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER));
3646 ASSERT(sp != &nfs4_server_lst);
3647 ASSERT(MUTEX_HELD(&sp->s_lock));
3648
3649 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
3650 "nfs4_add_mi_to_server: add mi %p to sp %p",
3651 (void*)mi, (void*)sp));
3652
3653 for (tmi = sp->mntinfo4_list;
3654 tmi != NULL;
3655 tmi = tmi->mi_clientid_next) {
3656 if (tmi == mi) {
3657 NFS4_DEBUG(nfs4_client_lease_debug,
3658 (CE_NOTE,
3659 "nfs4_add_mi_to_server: mi in list"));
3660 in_list = 1;
3661 }
3662 }
3663
3664 /*
3665 * First put a hold on the mntinfo4's vfsp so that references via
3666 * mntinfo4_list will be valid.
3667 */
3668 if (!in_list)
3669 VFS_HOLD(mi->mi_vfsp);
3670
3671 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4_add_mi_to_server: "
3672 "hold vfs %p for mi: %p", (void*)mi->mi_vfsp, (void*)mi));
3673
3674 if (!in_list) {
3675 if (sp->mntinfo4_list)
3676 sp->mntinfo4_list->mi_clientid_prev = mi;
3677 mi->mi_clientid_next = sp->mntinfo4_list;
3678 mi->mi_srv = sp;
3679 sp->mntinfo4_list = mi;
3680 mi->mi_srvsettime = gethrestime_sec();
3681 mi->mi_srvset_cnt++;
3682 }
3683
3684 /* set mi's clientid to that of sp's for later matching */
3685 mi->mi_clientid = sp->clientid;
3686
3687 /*
3688 * Update the clientid for any other mi's belonging to sp. This
3689 * must be done here while we hold sp->s_lock, so that
3690 * find_nfs4_server() continues to work.
3691 */
3692
3693 for (tmi = sp->mntinfo4_list;
3694 tmi != NULL;
3695 tmi = tmi->mi_clientid_next) {
3696 if (tmi != mi) {
3697 tmi->mi_clientid = sp->clientid;
3698 }
3699 }
3700 }
3701
3702 /*
3703 * Remove the mi from sp's mntinfo4_list and release its reference.
3704 * Exception: if mi still has open files, flag it for later removal (when
3705 * all the files are closed).
3706 *
3707 * If this is the last mntinfo4 in sp's list then tell the lease renewal
3708 * thread to exit.
3709 */
3710 static void
nfs4_remove_mi_from_server_nolock(mntinfo4_t * mi,nfs4_server_t * sp)3711 nfs4_remove_mi_from_server_nolock(mntinfo4_t *mi, nfs4_server_t *sp)
3712 {
3713 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
3714 "nfs4_remove_mi_from_server_nolock: remove mi %p from sp %p",
3715 (void*)mi, (void*)sp));
3716
3717 ASSERT(sp != NULL);
3718 ASSERT(MUTEX_HELD(&sp->s_lock));
3719 ASSERT(mi->mi_open_files >= 0);
3720
3721 /*
3722 * First make sure this mntinfo4 can be taken off of the list,
3723 * ie: it doesn't have any open files remaining.
3724 */
3725 if (mi->mi_open_files > 0) {
3726 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
3727 "nfs4_remove_mi_from_server_nolock: don't "
3728 "remove mi since it still has files open"));
3729
3730 mutex_enter(&mi->mi_lock);
3731 mi->mi_flags |= MI4_REMOVE_ON_LAST_CLOSE;
3732 mutex_exit(&mi->mi_lock);
3733 return;
3734 }
3735
3736 VFS_HOLD(mi->mi_vfsp);
3737 remove_mi(sp, mi);
3738 VFS_RELE(mi->mi_vfsp);
3739
3740 if (sp->mntinfo4_list == NULL) {
3741 /* last fs unmounted, kill the thread */
3742 NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
3743 "remove_mi_from_nfs4_server_nolock: kill the thread"));
3744 nfs4_mark_srv_dead(sp);
3745 }
3746 }
3747
3748 /*
3749 * Remove mi from sp's mntinfo4_list and release the vfs reference.
3750 */
3751 static void
remove_mi(nfs4_server_t * sp,mntinfo4_t * mi)3752 remove_mi(nfs4_server_t *sp, mntinfo4_t *mi)
3753 {
3754 ASSERT(MUTEX_HELD(&sp->s_lock));
3755
3756 /*
3757 * We release a reference, and the caller must still have a
3758 * reference.
3759 */
3760 ASSERT(mi->mi_vfsp->vfs_count >= 2);
3761
3762 if (mi->mi_clientid_prev) {
3763 mi->mi_clientid_prev->mi_clientid_next = mi->mi_clientid_next;
3764 } else {
3765 /* This is the first mi in sp's mntinfo4_list */
3766 /*
3767 * Make sure the first mntinfo4 in the list is the actual
3768 * mntinfo4 passed in.
3769 */
3770 ASSERT(sp->mntinfo4_list == mi);
3771
3772 sp->mntinfo4_list = mi->mi_clientid_next;
3773 }
3774 if (mi->mi_clientid_next)
3775 mi->mi_clientid_next->mi_clientid_prev = mi->mi_clientid_prev;
3776
3777 /* Now mark the mntinfo4's links as being removed */
3778 mi->mi_clientid_prev = mi->mi_clientid_next = NULL;
3779 mi->mi_srv = NULL;
3780 mi->mi_srvset_cnt++;
3781
3782 VFS_RELE(mi->mi_vfsp);
3783 }
3784
3785 /*
3786 * Free all the entries in sp's mntinfo4_list.
3787 */
3788 static void
remove_all_mi(nfs4_server_t * sp)3789 remove_all_mi(nfs4_server_t *sp)
3790 {
3791 mntinfo4_t *mi;
3792
3793 ASSERT(MUTEX_HELD(&sp->s_lock));
3794
3795 while (sp->mntinfo4_list != NULL) {
3796 mi = sp->mntinfo4_list;
3797 /*
3798 * Grab a reference in case there is only one left (which
3799 * remove_mi() frees).
3800 */
3801 VFS_HOLD(mi->mi_vfsp);
3802 remove_mi(sp, mi);
3803 VFS_RELE(mi->mi_vfsp);
3804 }
3805 }
3806
3807 /*
3808 * Remove the mi from sp's mntinfo4_list as above, and rele the vfs.
3809 *
3810 * This version can be called with a null nfs4_server_t arg,
3811 * and will either find the right one and handle locking, or
3812 * do nothing because the mi wasn't added to an sp's mntinfo4_list.
3813 */
3814 void
nfs4_remove_mi_from_server(mntinfo4_t * mi,nfs4_server_t * esp)3815 nfs4_remove_mi_from_server(mntinfo4_t *mi, nfs4_server_t *esp)
3816 {
3817 nfs4_server_t *sp;
3818
3819 if (esp) {
3820 nfs4_remove_mi_from_server_nolock(mi, esp);
3821 return;
3822 }
3823
3824 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0);
3825 if (sp = find_nfs4_server_all(mi, 1)) {
3826 nfs4_remove_mi_from_server_nolock(mi, sp);
3827 mutex_exit(&sp->s_lock);
3828 nfs4_server_rele(sp);
3829 }
3830 nfs_rw_exit(&mi->mi_recovlock);
3831 }
3832
3833 /*
3834 * Return TRUE if the given server has any non-unmounted filesystems.
3835 */
3836
3837 bool_t
nfs4_fs_active(nfs4_server_t * sp)3838 nfs4_fs_active(nfs4_server_t *sp)
3839 {
3840 mntinfo4_t *mi;
3841
3842 ASSERT(MUTEX_HELD(&sp->s_lock));
3843
3844 for (mi = sp->mntinfo4_list; mi != NULL; mi = mi->mi_clientid_next) {
3845 if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
3846 return (TRUE);
3847 }
3848
3849 return (FALSE);
3850 }
3851
3852 /*
3853 * Mark sp as finished and notify any waiters.
3854 */
3855
3856 void
nfs4_mark_srv_dead(nfs4_server_t * sp)3857 nfs4_mark_srv_dead(nfs4_server_t *sp)
3858 {
3859 ASSERT(MUTEX_HELD(&sp->s_lock));
3860
3861 sp->s_thread_exit = NFS4_THREAD_EXIT;
3862 cv_broadcast(&sp->cv_thread_exit);
3863 }
3864
3865 /*
3866 * Create a new nfs4_server_t structure.
3867 * Returns new node unlocked and not in list, but with a reference count of
3868 * 1.
3869 */
3870 struct nfs4_server *
new_nfs4_server(struct servinfo4 * svp,cred_t * cr)3871 new_nfs4_server(struct servinfo4 *svp, cred_t *cr)
3872 {
3873 struct nfs4_server *np;
3874 timespec_t tt;
3875 union {
3876 struct {
3877 uint32_t sec;
3878 uint32_t subsec;
3879 } un_curtime;
3880 verifier4 un_verifier;
3881 } nfs4clientid_verifier;
3882 /*
3883 * We change this ID string carefully and with the Solaris
3884 * NFS server behaviour in mind. "+referrals" indicates
3885 * a client that can handle an NFSv4 referral.
3886 */
3887 char id_val[] = "Solaris: %s, NFSv4 kernel client +referrals";
3888 int len;
3889
3890 np = kmem_zalloc(sizeof (struct nfs4_server), KM_SLEEP);
3891 np->saddr.len = svp->sv_addr.len;
3892 np->saddr.maxlen = svp->sv_addr.maxlen;
3893 np->saddr.buf = kmem_alloc(svp->sv_addr.maxlen, KM_SLEEP);
3894 bcopy(svp->sv_addr.buf, np->saddr.buf, svp->sv_addr.len);
3895 np->s_refcnt = 1;
3896
3897 /*
3898 * Build the nfs_client_id4 for this server mount. Ensure
3899 * the verifier is useful and that the identification is
3900 * somehow based on the server's address for the case of
3901 * multi-homed servers.
3902 */
3903 nfs4clientid_verifier.un_verifier = 0;
3904 gethrestime(&tt);
3905 nfs4clientid_verifier.un_curtime.sec = (uint32_t)tt.tv_sec;
3906 nfs4clientid_verifier.un_curtime.subsec = (uint32_t)tt.tv_nsec;
3907 np->clidtosend.verifier = nfs4clientid_verifier.un_verifier;
3908
3909 /*
3910 * calculate the length of the opaque identifier. Subtract 2
3911 * for the "%s" and add the traditional +1 for null
3912 * termination.
3913 */
3914 len = strlen(id_val) - 2 + strlen(uts_nodename()) + 1;
3915 np->clidtosend.id_len = len + np->saddr.maxlen;
3916
3917 np->clidtosend.id_val = kmem_alloc(np->clidtosend.id_len, KM_SLEEP);
3918 (void) sprintf(np->clidtosend.id_val, id_val, uts_nodename());
3919 bcopy(np->saddr.buf, &np->clidtosend.id_val[len], np->saddr.len);
3920
3921 np->s_flags = 0;
3922 np->mntinfo4_list = NULL;
3923 /* save cred for issuing rfs4calls inside the renew thread */
3924 crhold(cr);
3925 np->s_cred = cr;
3926 cv_init(&np->cv_thread_exit, NULL, CV_DEFAULT, NULL);
3927 mutex_init(&np->s_lock, NULL, MUTEX_DEFAULT, NULL);
3928 nfs_rw_init(&np->s_recovlock, NULL, RW_DEFAULT, NULL);
3929 list_create(&np->s_deleg_list, sizeof (rnode4_t),
3930 offsetof(rnode4_t, r_deleg_link));
3931 np->s_thread_exit = 0;
3932 np->state_ref_count = 0;
3933 np->lease_valid = NFS4_LEASE_NOT_STARTED;
3934 cv_init(&np->s_cv_otw_count, NULL, CV_DEFAULT, NULL);
3935 cv_init(&np->s_clientid_pend, NULL, CV_DEFAULT, NULL);
3936 np->s_otw_call_count = 0;
3937 cv_init(&np->wait_cb_null, NULL, CV_DEFAULT, NULL);
3938 np->zoneid = getzoneid();
3939 np->zone_globals = nfs4_get_callback_globals();
3940 ASSERT(np->zone_globals != NULL);
3941 return (np);
3942 }
3943
3944 /*
3945 * Create a new nfs4_server_t structure and add it to the list.
3946 * Returns new node locked; reference must eventually be freed.
3947 */
3948 static struct nfs4_server *
add_new_nfs4_server(struct servinfo4 * svp,cred_t * cr)3949 add_new_nfs4_server(struct servinfo4 *svp, cred_t *cr)
3950 {
3951 nfs4_server_t *sp;
3952
3953 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock));
3954 sp = new_nfs4_server(svp, cr);
3955 mutex_enter(&sp->s_lock);
3956 insque(sp, &nfs4_server_lst);
3957 sp->s_refcnt++; /* list gets a reference */
3958 sp->s_flags |= N4S_INSERTED;
3959 sp->clientid = 0;
3960 return (sp);
3961 }
3962
3963 int nfs4_server_t_debug = 0;
3964
3965 #ifdef lint
3966 extern void
3967 dumpnfs4slist(char *, mntinfo4_t *, clientid4, servinfo4_t *);
3968 #endif
3969
3970 #ifndef lint
3971 #ifdef DEBUG
3972 void
dumpnfs4slist(char * txt,mntinfo4_t * mi,clientid4 clientid,servinfo4_t * srv_p)3973 dumpnfs4slist(char *txt, mntinfo4_t *mi, clientid4 clientid, servinfo4_t *srv_p)
3974 {
3975 int hash16(void *p, int len);
3976 nfs4_server_t *np;
3977
3978 NFS4_DEBUG(nfs4_server_t_debug, (CE_NOTE,
3979 "dumping nfs4_server_t list in %s", txt));
3980 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT,
3981 "mi 0x%p, want clientid %llx, addr %d/%04X",
3982 mi, (longlong_t)clientid, srv_p->sv_addr.len,
3983 hash16((void *)srv_p->sv_addr.buf, srv_p->sv_addr.len)));
3984 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst;
3985 np = np->forw) {
3986 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT,
3987 "node 0x%p, clientid %llx, addr %d/%04X, cnt %d",
3988 np, (longlong_t)np->clientid, np->saddr.len,
3989 hash16((void *)np->saddr.buf, np->saddr.len),
3990 np->state_ref_count));
3991 if (np->saddr.len == srv_p->sv_addr.len &&
3992 bcmp(np->saddr.buf, srv_p->sv_addr.buf,
3993 np->saddr.len) == 0)
3994 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT,
3995 " - address matches"));
3996 if (np->clientid == clientid || np->clientid == 0)
3997 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT,
3998 " - clientid matches"));
3999 if (np->s_thread_exit != NFS4_THREAD_EXIT)
4000 NFS4_DEBUG(nfs4_server_t_debug, (CE_CONT,
4001 " - thread not exiting"));
4002 }
4003 delay(hz);
4004 }
4005 #endif
4006 #endif
4007
4008
4009 /*
4010 * Move a mntinfo4_t from one server list to another.
4011 * Locking of the two nfs4_server_t nodes will be done in list order.
4012 *
4013 * Returns NULL if the current nfs4_server_t for the filesystem could not
4014 * be found (e.g., due to forced unmount). Otherwise returns a reference
4015 * to the new nfs4_server_t, which must eventually be freed.
4016 */
4017 nfs4_server_t *
nfs4_move_mi(mntinfo4_t * mi,servinfo4_t * old,servinfo4_t * new)4018 nfs4_move_mi(mntinfo4_t *mi, servinfo4_t *old, servinfo4_t *new)
4019 {
4020 nfs4_server_t *p, *op = NULL, *np = NULL;
4021 int num_open;
4022 zoneid_t zoneid = nfs_zoneid();
4023
4024 ASSERT(nfs_zone() == mi->mi_zone);
4025
4026 mutex_enter(&nfs4_server_lst_lock);
4027 #ifdef DEBUG
4028 if (nfs4_server_t_debug)
4029 dumpnfs4slist("nfs4_move_mi", mi, (clientid4)0, new);
4030 #endif
4031 for (p = nfs4_server_lst.forw; p != &nfs4_server_lst; p = p->forw) {
4032 if (p->zoneid != zoneid)
4033 continue;
4034 if (p->saddr.len == old->sv_addr.len &&
4035 bcmp(p->saddr.buf, old->sv_addr.buf, p->saddr.len) == 0 &&
4036 p->s_thread_exit != NFS4_THREAD_EXIT) {
4037 op = p;
4038 mutex_enter(&op->s_lock);
4039 op->s_refcnt++;
4040 }
4041 if (p->saddr.len == new->sv_addr.len &&
4042 bcmp(p->saddr.buf, new->sv_addr.buf, p->saddr.len) == 0 &&
4043 p->s_thread_exit != NFS4_THREAD_EXIT) {
4044 np = p;
4045 mutex_enter(&np->s_lock);
4046 }
4047 if (op != NULL && np != NULL)
4048 break;
4049 }
4050 if (op == NULL) {
4051 /*
4052 * Filesystem has been forcibly unmounted. Bail out.
4053 */
4054 if (np != NULL)
4055 mutex_exit(&np->s_lock);
4056 mutex_exit(&nfs4_server_lst_lock);
4057 return (NULL);
4058 }
4059 if (np != NULL) {
4060 np->s_refcnt++;
4061 } else {
4062 #ifdef DEBUG
4063 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
4064 "nfs4_move_mi: no target nfs4_server, will create."));
4065 #endif
4066 np = add_new_nfs4_server(new, kcred);
4067 }
4068 mutex_exit(&nfs4_server_lst_lock);
4069
4070 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
4071 "nfs4_move_mi: for mi 0x%p, "
4072 "old servinfo4 0x%p, new servinfo4 0x%p, "
4073 "old nfs4_server 0x%p, new nfs4_server 0x%p, ",
4074 (void*)mi, (void*)old, (void*)new,
4075 (void*)op, (void*)np));
4076 ASSERT(op != NULL && np != NULL);
4077
4078 /* discard any delegations */
4079 nfs4_deleg_discard(mi, op);
4080
4081 num_open = mi->mi_open_files;
4082 mi->mi_open_files = 0;
4083 op->state_ref_count -= num_open;
4084 ASSERT(op->state_ref_count >= 0);
4085 np->state_ref_count += num_open;
4086 nfs4_remove_mi_from_server_nolock(mi, op);
4087 mi->mi_open_files = num_open;
4088 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
4089 "nfs4_move_mi: mi_open_files %d, op->cnt %d, np->cnt %d",
4090 mi->mi_open_files, op->state_ref_count, np->state_ref_count));
4091
4092 nfs4_add_mi_to_server(np, mi);
4093
4094 mutex_exit(&op->s_lock);
4095 mutex_exit(&np->s_lock);
4096 nfs4_server_rele(op);
4097
4098 return (np);
4099 }
4100
4101 /*
4102 * Need to have the nfs4_server_lst_lock.
4103 * Search the nfs4_server list to find a match on this servinfo4
4104 * based on its address.
4105 *
4106 * Returns NULL if no match is found. Otherwise returns a reference (which
4107 * must eventually be freed) to a locked nfs4_server.
4108 */
4109 nfs4_server_t *
servinfo4_to_nfs4_server(servinfo4_t * srv_p)4110 servinfo4_to_nfs4_server(servinfo4_t *srv_p)
4111 {
4112 nfs4_server_t *np;
4113 zoneid_t zoneid = nfs_zoneid();
4114
4115 ASSERT(MUTEX_HELD(&nfs4_server_lst_lock));
4116 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) {
4117 if (np->zoneid == zoneid &&
4118 np->saddr.len == srv_p->sv_addr.len &&
4119 bcmp(np->saddr.buf, srv_p->sv_addr.buf,
4120 np->saddr.len) == 0 &&
4121 np->s_thread_exit != NFS4_THREAD_EXIT) {
4122 mutex_enter(&np->s_lock);
4123 np->s_refcnt++;
4124 return (np);
4125 }
4126 }
4127 return (NULL);
4128 }
4129
4130 /*
4131 * Locks the nfs4_server down if it is found and returns a reference that
4132 * must eventually be freed.
4133 */
4134 static nfs4_server_t *
lookup_nfs4_server(nfs4_server_t * sp,int any_state)4135 lookup_nfs4_server(nfs4_server_t *sp, int any_state)
4136 {
4137 nfs4_server_t *np;
4138
4139 mutex_enter(&nfs4_server_lst_lock);
4140 for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) {
4141 mutex_enter(&np->s_lock);
4142 if (np == sp && np->s_refcnt > 0 &&
4143 (np->s_thread_exit != NFS4_THREAD_EXIT || any_state)) {
4144 mutex_exit(&nfs4_server_lst_lock);
4145 np->s_refcnt++;
4146 return (np);
4147 }
4148 mutex_exit(&np->s_lock);
4149 }
4150 mutex_exit(&nfs4_server_lst_lock);
4151
4152 return (NULL);
4153 }
4154
4155 /*
4156 * The caller should be holding mi->mi_recovlock, and it should continue to
4157 * hold the lock until done with the returned nfs4_server_t. Once
4158 * mi->mi_recovlock is released, there is no guarantee that the returned
4159 * mi->nfs4_server_t will continue to correspond to mi.
4160 */
4161 nfs4_server_t *
find_nfs4_server(mntinfo4_t * mi)4162 find_nfs4_server(mntinfo4_t *mi)
4163 {
4164 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) ||
4165 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER));
4166
4167 return (lookup_nfs4_server(mi->mi_srv, 0));
4168 }
4169
4170 /*
4171 * Same as above, but takes an "any_state" parameter which can be
4172 * set to 1 if the caller wishes to find nfs4_server_t's which
4173 * have been marked for termination by the exit of the renew
4174 * thread. This should only be used by operations which are
4175 * cleaning up and will not cause an OTW op.
4176 */
4177 nfs4_server_t *
find_nfs4_server_all(mntinfo4_t * mi,int any_state)4178 find_nfs4_server_all(mntinfo4_t *mi, int any_state)
4179 {
4180 ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) ||
4181 nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER));
4182
4183 return (lookup_nfs4_server(mi->mi_srv, any_state));
4184 }
4185
4186 /*
4187 * Lock sp, but only if it's still active (in the list and hasn't been
4188 * flagged as exiting) or 'any_state' is non-zero.
4189 * Returns TRUE if sp got locked and adds a reference to sp.
4190 */
4191 bool_t
nfs4_server_vlock(nfs4_server_t * sp,int any_state)4192 nfs4_server_vlock(nfs4_server_t *sp, int any_state)
4193 {
4194 return (lookup_nfs4_server(sp, any_state) != NULL);
4195 }
4196
4197 /*
4198 * Release the reference to sp and destroy it if that's the last one.
4199 */
4200
4201 void
nfs4_server_rele(nfs4_server_t * sp)4202 nfs4_server_rele(nfs4_server_t *sp)
4203 {
4204 mutex_enter(&sp->s_lock);
4205 ASSERT(sp->s_refcnt > 0);
4206 sp->s_refcnt--;
4207 if (sp->s_refcnt > 0) {
4208 mutex_exit(&sp->s_lock);
4209 return;
4210 }
4211 mutex_exit(&sp->s_lock);
4212
4213 mutex_enter(&nfs4_server_lst_lock);
4214 mutex_enter(&sp->s_lock);
4215 if (sp->s_refcnt > 0) {
4216 mutex_exit(&sp->s_lock);
4217 mutex_exit(&nfs4_server_lst_lock);
4218 return;
4219 }
4220 remque(sp);
4221 sp->forw = sp->back = NULL;
4222 mutex_exit(&nfs4_server_lst_lock);
4223 destroy_nfs4_server(sp);
4224 }
4225
4226 static void
destroy_nfs4_server(nfs4_server_t * sp)4227 destroy_nfs4_server(nfs4_server_t *sp)
4228 {
4229 ASSERT(MUTEX_HELD(&sp->s_lock));
4230 ASSERT(sp->s_refcnt == 0);
4231 ASSERT(sp->s_otw_call_count == 0);
4232
4233 remove_all_mi(sp);
4234
4235 crfree(sp->s_cred);
4236 kmem_free(sp->saddr.buf, sp->saddr.maxlen);
4237 kmem_free(sp->clidtosend.id_val, sp->clidtosend.id_len);
4238 mutex_exit(&sp->s_lock);
4239
4240 /* destroy the nfs4_server */
4241 nfs4callback_destroy(sp);
4242 list_destroy(&sp->s_deleg_list);
4243 mutex_destroy(&sp->s_lock);
4244 cv_destroy(&sp->cv_thread_exit);
4245 cv_destroy(&sp->s_cv_otw_count);
4246 cv_destroy(&sp->s_clientid_pend);
4247 cv_destroy(&sp->wait_cb_null);
4248 nfs_rw_destroy(&sp->s_recovlock);
4249 kmem_free(sp, sizeof (*sp));
4250 }
4251
4252 /*
4253 * Fork off a thread to free the data structures for a mount.
4254 */
4255
4256 static void
async_free_mount(vfs_t * vfsp,int flag,cred_t * cr)4257 async_free_mount(vfs_t *vfsp, int flag, cred_t *cr)
4258 {
4259 freemountargs_t *args;
4260 args = kmem_alloc(sizeof (freemountargs_t), KM_SLEEP);
4261 args->fm_vfsp = vfsp;
4262 VFS_HOLD(vfsp);
4263 MI4_HOLD(VFTOMI4(vfsp));
4264 args->fm_flag = flag;
4265 args->fm_cr = cr;
4266 crhold(cr);
4267 (void) zthread_create(NULL, 0, nfs4_free_mount_thread, args, 0,
4268 minclsyspri);
4269 }
4270
4271 static void
nfs4_free_mount_thread(freemountargs_t * args)4272 nfs4_free_mount_thread(freemountargs_t *args)
4273 {
4274 mntinfo4_t *mi;
4275 nfs4_free_mount(args->fm_vfsp, args->fm_flag, args->fm_cr);
4276 mi = VFTOMI4(args->fm_vfsp);
4277 crfree(args->fm_cr);
4278 VFS_RELE(args->fm_vfsp);
4279 MI4_RELE(mi);
4280 kmem_free(args, sizeof (freemountargs_t));
4281 zthread_exit();
4282 /* NOTREACHED */
4283 }
4284
4285 /*
4286 * Thread to free the data structures for a given filesystem.
4287 */
4288 static void
nfs4_free_mount(vfs_t * vfsp,int flag,cred_t * cr)4289 nfs4_free_mount(vfs_t *vfsp, int flag, cred_t *cr)
4290 {
4291 mntinfo4_t *mi = VFTOMI4(vfsp);
4292 nfs4_server_t *sp;
4293 callb_cpr_t cpr_info;
4294 kmutex_t cpr_lock;
4295 boolean_t async_thread;
4296 int removed;
4297
4298 bool_t must_unlock;
4299 nfs4_ephemeral_tree_t *eph_tree;
4300
4301 /*
4302 * We need to participate in the CPR framework if this is a kernel
4303 * thread.
4304 */
4305 async_thread = (curproc == nfs_zone()->zone_zsched);
4306 if (async_thread) {
4307 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
4308 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr,
4309 "nfsv4AsyncUnmount");
4310 }
4311
4312 /*
4313 * We need to wait for all outstanding OTW calls
4314 * and recovery to finish before we remove the mi
4315 * from the nfs4_server_t, as current pending
4316 * calls might still need this linkage (in order
4317 * to find a nfs4_server_t from a mntinfo4_t).
4318 */
4319 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, FALSE);
4320 sp = find_nfs4_server(mi);
4321 nfs_rw_exit(&mi->mi_recovlock);
4322
4323 if (sp) {
4324 while (sp->s_otw_call_count != 0) {
4325 if (async_thread) {
4326 mutex_enter(&cpr_lock);
4327 CALLB_CPR_SAFE_BEGIN(&cpr_info);
4328 mutex_exit(&cpr_lock);
4329 }
4330 cv_wait(&sp->s_cv_otw_count, &sp->s_lock);
4331 if (async_thread) {
4332 mutex_enter(&cpr_lock);
4333 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock);
4334 mutex_exit(&cpr_lock);
4335 }
4336 }
4337 mutex_exit(&sp->s_lock);
4338 nfs4_server_rele(sp);
4339 sp = NULL;
4340 }
4341
4342 mutex_enter(&mi->mi_lock);
4343 while (mi->mi_in_recovery != 0) {
4344 if (async_thread) {
4345 mutex_enter(&cpr_lock);
4346 CALLB_CPR_SAFE_BEGIN(&cpr_info);
4347 mutex_exit(&cpr_lock);
4348 }
4349 cv_wait(&mi->mi_cv_in_recov, &mi->mi_lock);
4350 if (async_thread) {
4351 mutex_enter(&cpr_lock);
4352 CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock);
4353 mutex_exit(&cpr_lock);
4354 }
4355 }
4356 mutex_exit(&mi->mi_lock);
4357
4358 /*
4359 * If we got an error, then do not nuke the
4360 * tree. Either the harvester is busy reclaiming
4361 * this node or we ran into some busy condition.
4362 *
4363 * The harvester will eventually come along and cleanup.
4364 * The only problem would be the root mount point.
4365 *
4366 * Since the busy node can occur for a variety
4367 * of reasons and can result in an entry staying
4368 * in df output but no longer accessible from the
4369 * directory tree, we are okay.
4370 */
4371 if (!nfs4_ephemeral_umount(mi, flag, cr,
4372 &must_unlock, &eph_tree))
4373 nfs4_ephemeral_umount_activate(mi, &must_unlock,
4374 &eph_tree);
4375
4376 /*
4377 * The original purge of the dnlc via 'dounmount'
4378 * doesn't guarantee that another dnlc entry was not
4379 * added while we waitied for all outstanding OTW
4380 * and recovery calls to finish. So re-purge the
4381 * dnlc now.
4382 */
4383 (void) dnlc_purge_vfsp(vfsp, 0);
4384
4385 /*
4386 * We need to explicitly stop the manager thread; the asyc worker
4387 * threads can timeout and exit on their own.
4388 */
4389 mutex_enter(&mi->mi_async_lock);
4390 mi->mi_max_threads = 0;
4391 NFS4_WAKEALL_ASYNC_WORKERS(mi->mi_async_work_cv);
4392 mutex_exit(&mi->mi_async_lock);
4393 if (mi->mi_manager_thread)
4394 nfs4_async_manager_stop(vfsp);
4395
4396 destroy_rtable4(vfsp, cr);
4397
4398 nfs4_remove_mi_from_server(mi, NULL);
4399
4400 if (async_thread) {
4401 mutex_enter(&cpr_lock);
4402 CALLB_CPR_EXIT(&cpr_info); /* drops cpr_lock */
4403 mutex_destroy(&cpr_lock);
4404 }
4405
4406 removed = nfs4_mi_zonelist_remove(mi);
4407 if (removed)
4408 zone_rele_ref(&mi->mi_zone_ref, ZONE_REF_NFSV4);
4409 }
4410
4411 /* Referral related sub-routines */
4412
4413 /* Freeup knetconfig */
4414 static void
free_knconf_contents(struct knetconfig * k)4415 free_knconf_contents(struct knetconfig *k)
4416 {
4417 if (k == NULL)
4418 return;
4419 if (k->knc_protofmly)
4420 kmem_free(k->knc_protofmly, KNC_STRSIZE);
4421 if (k->knc_proto)
4422 kmem_free(k->knc_proto, KNC_STRSIZE);
4423 }
4424
4425 /*
4426 * This updates newpath variable with exact name component from the
4427 * path which gave us a NFS4ERR_MOVED error.
4428 * If the path is /rp/aaa/bbb and nth value is 1, aaa is returned.
4429 */
4430 static char *
extract_referral_point(const char * svp,int nth)4431 extract_referral_point(const char *svp, int nth)
4432 {
4433 int num_slashes = 0;
4434 const char *p;
4435 char *newpath = NULL;
4436 int i = 0;
4437
4438 newpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
4439 for (p = svp; *p; p++) {
4440 if (*p == '/')
4441 num_slashes++;
4442 if (num_slashes == nth + 1) {
4443 p++;
4444 while (*p != '/') {
4445 if (*p == '\0')
4446 break;
4447 newpath[i] = *p;
4448 i++;
4449 p++;
4450 }
4451 newpath[i++] = '\0';
4452 break;
4453 }
4454 }
4455 return (newpath);
4456 }
4457
4458 /*
4459 * This sets up a new path in sv_path to do a lookup of the referral point.
4460 * If the path is /rp/aaa/bbb and the referral point is aaa,
4461 * this updates /rp/aaa. This path will be used to get referral
4462 * location.
4463 */
4464 static void
setup_newsvpath(servinfo4_t * svp,int nth)4465 setup_newsvpath(servinfo4_t *svp, int nth)
4466 {
4467 int num_slashes = 0, pathlen, i = 0;
4468 char *newpath, *p;
4469
4470 newpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
4471 for (p = svp->sv_path; *p; p++) {
4472 newpath[i] = *p;
4473 if (*p == '/')
4474 num_slashes++;
4475 if (num_slashes == nth + 1) {
4476 newpath[i] = '\0';
4477 pathlen = strlen(newpath) + 1;
4478 kmem_free(svp->sv_path, svp->sv_pathlen);
4479 svp->sv_path = kmem_alloc(pathlen, KM_SLEEP);
4480 svp->sv_pathlen = pathlen;
4481 bcopy(newpath, svp->sv_path, pathlen);
4482 break;
4483 }
4484 i++;
4485 }
4486 kmem_free(newpath, MAXPATHLEN);
4487 }
4488