1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /*
26 * utility routines for the /dev fs
27 */
28
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <sys/t_lock.h>
32 #include <sys/systm.h>
33 #include <sys/sysmacros.h>
34 #include <sys/user.h>
35 #include <sys/time.h>
36 #include <sys/vfs.h>
37 #include <sys/vnode.h>
38 #include <sys/file.h>
39 #include <sys/fcntl.h>
40 #include <sys/flock.h>
41 #include <sys/kmem.h>
42 #include <sys/uio.h>
43 #include <sys/errno.h>
44 #include <sys/stat.h>
45 #include <sys/cred.h>
46 #include <sys/dirent.h>
47 #include <sys/pathname.h>
48 #include <sys/cmn_err.h>
49 #include <sys/debug.h>
50 #include <sys/mode.h>
51 #include <sys/policy.h>
52 #include <fs/fs_subr.h>
53 #include <sys/mount.h>
54 #include <sys/fs/snode.h>
55 #include <sys/fs/dv_node.h>
56 #include <sys/fs/sdev_impl.h>
57 #include <sys/sunndi.h>
58 #include <sys/sunmdi.h>
59 #include <sys/conf.h>
60 #include <sys/proc.h>
61 #include <sys/user.h>
62 #include <sys/modctl.h>
63
64 #ifdef DEBUG
65 int sdev_debug = 0x00000001;
66 int sdev_debug_cache_flags = 0;
67 #endif
68
69 /*
70 * globals
71 */
72 /* prototype memory vattrs */
73 vattr_t sdev_vattr_dir = {
74 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */
75 VDIR, /* va_type */
76 SDEV_DIRMODE_DEFAULT, /* va_mode */
77 SDEV_UID_DEFAULT, /* va_uid */
78 SDEV_GID_DEFAULT, /* va_gid */
79 0, /* va_fsid */
80 0, /* va_nodeid */
81 0, /* va_nlink */
82 0, /* va_size */
83 0, /* va_atime */
84 0, /* va_mtime */
85 0, /* va_ctime */
86 0, /* va_rdev */
87 0, /* va_blksize */
88 0, /* va_nblocks */
89 0 /* va_vcode */
90 };
91
92 vattr_t sdev_vattr_lnk = {
93 AT_TYPE|AT_MODE, /* va_mask */
94 VLNK, /* va_type */
95 SDEV_LNKMODE_DEFAULT, /* va_mode */
96 SDEV_UID_DEFAULT, /* va_uid */
97 SDEV_GID_DEFAULT, /* va_gid */
98 0, /* va_fsid */
99 0, /* va_nodeid */
100 0, /* va_nlink */
101 0, /* va_size */
102 0, /* va_atime */
103 0, /* va_mtime */
104 0, /* va_ctime */
105 0, /* va_rdev */
106 0, /* va_blksize */
107 0, /* va_nblocks */
108 0 /* va_vcode */
109 };
110
111 vattr_t sdev_vattr_blk = {
112 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */
113 VBLK, /* va_type */
114 S_IFBLK | SDEV_DEVMODE_DEFAULT, /* va_mode */
115 SDEV_UID_DEFAULT, /* va_uid */
116 SDEV_GID_DEFAULT, /* va_gid */
117 0, /* va_fsid */
118 0, /* va_nodeid */
119 0, /* va_nlink */
120 0, /* va_size */
121 0, /* va_atime */
122 0, /* va_mtime */
123 0, /* va_ctime */
124 0, /* va_rdev */
125 0, /* va_blksize */
126 0, /* va_nblocks */
127 0 /* va_vcode */
128 };
129
130 vattr_t sdev_vattr_chr = {
131 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */
132 VCHR, /* va_type */
133 S_IFCHR | SDEV_DEVMODE_DEFAULT, /* va_mode */
134 SDEV_UID_DEFAULT, /* va_uid */
135 SDEV_GID_DEFAULT, /* va_gid */
136 0, /* va_fsid */
137 0, /* va_nodeid */
138 0, /* va_nlink */
139 0, /* va_size */
140 0, /* va_atime */
141 0, /* va_mtime */
142 0, /* va_ctime */
143 0, /* va_rdev */
144 0, /* va_blksize */
145 0, /* va_nblocks */
146 0 /* va_vcode */
147 };
148
149 kmem_cache_t *sdev_node_cache; /* sdev_node cache */
150 int devtype; /* fstype */
151
152 /* static */
153 static struct vnodeops *sdev_get_vop(struct sdev_node *);
154 static void sdev_set_no_negcache(struct sdev_node *);
155 static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []);
156 static void sdev_free_vtab(fs_operation_def_t *);
157
158 static void
sdev_prof_free(struct sdev_node * dv)159 sdev_prof_free(struct sdev_node *dv)
160 {
161 ASSERT(!SDEV_IS_GLOBAL(dv));
162 if (dv->sdev_prof.dev_name)
163 nvlist_free(dv->sdev_prof.dev_name);
164 if (dv->sdev_prof.dev_map)
165 nvlist_free(dv->sdev_prof.dev_map);
166 if (dv->sdev_prof.dev_symlink)
167 nvlist_free(dv->sdev_prof.dev_symlink);
168 if (dv->sdev_prof.dev_glob_incdir)
169 nvlist_free(dv->sdev_prof.dev_glob_incdir);
170 if (dv->sdev_prof.dev_glob_excdir)
171 nvlist_free(dv->sdev_prof.dev_glob_excdir);
172 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
173 }
174
175 /* sdev_node cache constructor */
176 /*ARGSUSED1*/
177 static int
i_sdev_node_ctor(void * buf,void * cfarg,int flag)178 i_sdev_node_ctor(void *buf, void *cfarg, int flag)
179 {
180 struct sdev_node *dv = (struct sdev_node *)buf;
181 struct vnode *vp;
182
183 bzero(buf, sizeof (struct sdev_node));
184 vp = dv->sdev_vnode = vn_alloc(flag);
185 if (vp == NULL) {
186 return (-1);
187 }
188 vp->v_data = dv;
189 rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL);
190 return (0);
191 }
192
193 /* sdev_node cache destructor */
194 /*ARGSUSED1*/
195 static void
i_sdev_node_dtor(void * buf,void * arg)196 i_sdev_node_dtor(void *buf, void *arg)
197 {
198 struct sdev_node *dv = (struct sdev_node *)buf;
199 struct vnode *vp = SDEVTOV(dv);
200
201 rw_destroy(&dv->sdev_contents);
202 vn_free(vp);
203 }
204
205 /* initialize sdev_node cache */
206 void
sdev_node_cache_init()207 sdev_node_cache_init()
208 {
209 int flags = 0;
210
211 #ifdef DEBUG
212 flags = sdev_debug_cache_flags;
213 if (flags)
214 sdcmn_err(("cache debug flags 0x%x\n", flags));
215 #endif /* DEBUG */
216
217 ASSERT(sdev_node_cache == NULL);
218 sdev_node_cache = kmem_cache_create("sdev_node_cache",
219 sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor,
220 NULL, NULL, NULL, flags);
221 }
222
223 /* destroy sdev_node cache */
224 void
sdev_node_cache_fini()225 sdev_node_cache_fini()
226 {
227 ASSERT(sdev_node_cache != NULL);
228 kmem_cache_destroy(sdev_node_cache);
229 sdev_node_cache = NULL;
230 }
231
232 /*
233 * Compare two nodes lexographically to balance avl tree
234 */
235 static int
sdev_compare_nodes(const struct sdev_node * dv1,const struct sdev_node * dv2)236 sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2)
237 {
238 int rv;
239 if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0)
240 return (0);
241 return ((rv < 0) ? -1 : 1);
242 }
243
244 void
sdev_set_nodestate(struct sdev_node * dv,sdev_node_state_t state)245 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state)
246 {
247 ASSERT(dv);
248 ASSERT(RW_WRITE_HELD(&dv->sdev_contents));
249 dv->sdev_state = state;
250 }
251
252 static void
sdev_attr_update(struct sdev_node * dv,vattr_t * vap)253 sdev_attr_update(struct sdev_node *dv, vattr_t *vap)
254 {
255 timestruc_t now;
256 struct vattr *attrp;
257 uint_t mask;
258
259 ASSERT(dv->sdev_attr);
260 ASSERT(vap);
261
262 attrp = dv->sdev_attr;
263 mask = vap->va_mask;
264 if (mask & AT_TYPE)
265 attrp->va_type = vap->va_type;
266 if (mask & AT_MODE)
267 attrp->va_mode = vap->va_mode;
268 if (mask & AT_UID)
269 attrp->va_uid = vap->va_uid;
270 if (mask & AT_GID)
271 attrp->va_gid = vap->va_gid;
272 if (mask & AT_RDEV)
273 attrp->va_rdev = vap->va_rdev;
274
275 gethrestime(&now);
276 attrp->va_atime = (mask & AT_ATIME) ? vap->va_atime : now;
277 attrp->va_mtime = (mask & AT_MTIME) ? vap->va_mtime : now;
278 attrp->va_ctime = (mask & AT_CTIME) ? vap->va_ctime : now;
279 }
280
281 static void
sdev_attr_alloc(struct sdev_node * dv,vattr_t * vap)282 sdev_attr_alloc(struct sdev_node *dv, vattr_t *vap)
283 {
284 ASSERT(dv->sdev_attr == NULL);
285 ASSERT(vap->va_mask & AT_TYPE);
286 ASSERT(vap->va_mask & AT_MODE);
287
288 dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
289 sdev_attr_update(dv, vap);
290 }
291
292 /* alloc and initialize a sdev_node */
293 int
sdev_nodeinit(struct sdev_node * ddv,char * nm,struct sdev_node ** newdv,vattr_t * vap)294 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
295 vattr_t *vap)
296 {
297 struct sdev_node *dv = NULL;
298 struct vnode *vp;
299 size_t nmlen, len;
300 devname_handle_t *dhl;
301
302 nmlen = strlen(nm) + 1;
303 if (nmlen > MAXNAMELEN) {
304 sdcmn_err9(("sdev_nodeinit: node name %s"
305 " too long\n", nm));
306 *newdv = NULL;
307 return (ENAMETOOLONG);
308 }
309
310 dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
311
312 dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP);
313 bcopy(nm, dv->sdev_name, nmlen);
314 dv->sdev_namelen = nmlen - 1; /* '\0' not included */
315 len = strlen(ddv->sdev_path) + strlen(nm) + 2;
316 dv->sdev_path = kmem_alloc(len, KM_SLEEP);
317 (void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm);
318 /* overwritten for VLNK nodes */
319 dv->sdev_symlink = NULL;
320
321 vp = SDEVTOV(dv);
322 vn_reinit(vp);
323 vp->v_vfsp = SDEVTOV(ddv)->v_vfsp;
324 if (vap)
325 vp->v_type = vap->va_type;
326
327 /*
328 * initialized to the parent's vnodeops.
329 * maybe overwriten for a VDIR
330 */
331 vn_setops(vp, vn_getops(SDEVTOV(ddv)));
332 vn_exists(vp);
333
334 dv->sdev_dotdot = NULL;
335 dv->sdev_attrvp = NULL;
336 if (vap) {
337 sdev_attr_alloc(dv, vap);
338 } else {
339 dv->sdev_attr = NULL;
340 }
341
342 dv->sdev_ino = sdev_mkino(dv);
343 dv->sdev_nlink = 0; /* updated on insert */
344 dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */
345 dv->sdev_flags |= SDEV_BUILD;
346 mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
347 cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
348 if (SDEV_IS_GLOBAL(ddv)) {
349 dv->sdev_flags |= SDEV_GLOBAL;
350 dhl = &(dv->sdev_handle);
351 dhl->dh_data = dv;
352 dhl->dh_args = NULL;
353 sdev_set_no_negcache(dv);
354 dv->sdev_gdir_gen = 0;
355 } else {
356 dv->sdev_flags &= ~SDEV_GLOBAL;
357 dv->sdev_origin = NULL; /* set later */
358 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
359 dv->sdev_ldir_gen = 0;
360 dv->sdev_devtree_gen = 0;
361 }
362
363 rw_enter(&dv->sdev_contents, RW_WRITER);
364 sdev_set_nodestate(dv, SDEV_INIT);
365 rw_exit(&dv->sdev_contents);
366 *newdv = dv;
367
368 return (0);
369 }
370
371 /*
372 * transition a sdev_node into SDEV_READY state
373 */
374 int
sdev_nodeready(struct sdev_node * dv,struct vattr * vap,struct vnode * avp,void * args,struct cred * cred)375 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp,
376 void *args, struct cred *cred)
377 {
378 int error = 0;
379 struct vnode *vp = SDEVTOV(dv);
380 vtype_t type;
381
382 ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap);
383
384 type = vap->va_type;
385 vp->v_type = type;
386 vp->v_rdev = vap->va_rdev;
387 rw_enter(&dv->sdev_contents, RW_WRITER);
388 if (type == VDIR) {
389 dv->sdev_nlink = 2;
390 dv->sdev_flags &= ~SDEV_PERSIST;
391 dv->sdev_flags &= ~SDEV_DYNAMIC;
392 vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */
393 ASSERT(dv->sdev_dotdot);
394 ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR);
395 vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev;
396 avl_create(&dv->sdev_entries,
397 (int (*)(const void *, const void *))sdev_compare_nodes,
398 sizeof (struct sdev_node),
399 offsetof(struct sdev_node, sdev_avllink));
400 } else if (type == VLNK) {
401 ASSERT(args);
402 dv->sdev_nlink = 1;
403 dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP);
404 } else {
405 dv->sdev_nlink = 1;
406 }
407
408 if (!(SDEV_IS_GLOBAL(dv))) {
409 dv->sdev_origin = (struct sdev_node *)args;
410 dv->sdev_flags &= ~SDEV_PERSIST;
411 }
412
413 /*
414 * shadow node is created here OR
415 * if failed (indicated by dv->sdev_attrvp == NULL),
416 * created later in sdev_setattr
417 */
418 if (avp) {
419 dv->sdev_attrvp = avp;
420 } else {
421 if (dv->sdev_attr == NULL) {
422 sdev_attr_alloc(dv, vap);
423 } else {
424 sdev_attr_update(dv, vap);
425 }
426
427 if ((dv->sdev_attrvp == NULL) && SDEV_IS_PERSIST(dv))
428 error = sdev_shadow_node(dv, cred);
429 }
430
431 if (error == 0) {
432 /* transition to READY state */
433 sdev_set_nodestate(dv, SDEV_READY);
434 sdev_nc_node_exists(dv);
435 } else {
436 sdev_set_nodestate(dv, SDEV_ZOMBIE);
437 }
438 rw_exit(&dv->sdev_contents);
439 return (error);
440 }
441
442 /*
443 * setting ZOMBIE state
444 */
445 static int
sdev_nodezombied(struct sdev_node * dv)446 sdev_nodezombied(struct sdev_node *dv)
447 {
448 rw_enter(&dv->sdev_contents, RW_WRITER);
449 sdev_set_nodestate(dv, SDEV_ZOMBIE);
450 rw_exit(&dv->sdev_contents);
451 return (0);
452 }
453
454 /*
455 * Build the VROOT sdev_node.
456 */
457 /*ARGSUSED*/
458 struct sdev_node *
sdev_mkroot(struct vfs * vfsp,dev_t devdev,struct vnode * mvp,struct vnode * avp,struct cred * cred)459 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp,
460 struct vnode *avp, struct cred *cred)
461 {
462 struct sdev_node *dv;
463 struct vnode *vp;
464 char devdir[] = "/dev";
465
466 ASSERT(sdev_node_cache != NULL);
467 ASSERT(avp);
468 dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
469 vp = SDEVTOV(dv);
470 vn_reinit(vp);
471 vp->v_flag |= VROOT;
472 vp->v_vfsp = vfsp;
473 vp->v_type = VDIR;
474 vp->v_rdev = devdev;
475 vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */
476 vn_exists(vp);
477
478 if (vfsp->vfs_mntpt)
479 dv->sdev_name = i_ddi_strdup(
480 (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP);
481 else
482 /* vfs_mountdev1 set mount point later */
483 dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP);
484 dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */
485 dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP);
486 dv->sdev_ino = SDEV_ROOTINO;
487 dv->sdev_nlink = 2; /* name + . (no sdev_insert) */
488 dv->sdev_dotdot = dv; /* .. == self */
489 dv->sdev_attrvp = avp;
490 dv->sdev_attr = NULL;
491 mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
492 cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
493 if (strcmp(dv->sdev_name, "/dev") == 0) {
494 dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST;
495 bzero(&dv->sdev_handle, sizeof (dv->sdev_handle));
496 dv->sdev_gdir_gen = 0;
497 } else {
498 dv->sdev_flags = SDEV_BUILD;
499 dv->sdev_flags &= ~SDEV_PERSIST;
500 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
501 dv->sdev_ldir_gen = 0;
502 dv->sdev_devtree_gen = 0;
503 }
504
505 avl_create(&dv->sdev_entries,
506 (int (*)(const void *, const void *))sdev_compare_nodes,
507 sizeof (struct sdev_node),
508 offsetof(struct sdev_node, sdev_avllink));
509
510 rw_enter(&dv->sdev_contents, RW_WRITER);
511 sdev_set_nodestate(dv, SDEV_READY);
512 rw_exit(&dv->sdev_contents);
513 sdev_nc_node_exists(dv);
514 return (dv);
515 }
516
517 /* directory dependent vop table */
518 struct sdev_vop_table {
519 char *vt_name; /* subdirectory name */
520 const fs_operation_def_t *vt_service; /* vnodeops table */
521 struct vnodeops *vt_vops; /* constructed vop */
522 struct vnodeops **vt_global_vops; /* global container for vop */
523 int (*vt_vtor)(struct sdev_node *); /* validate sdev_node */
524 int vt_flags;
525 };
526
527 /*
528 * A nice improvement would be to provide a plug-in mechanism
529 * for this table instead of a const table.
530 */
531 static struct sdev_vop_table vtab[] =
532 {
533 { "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate,
534 SDEV_DYNAMIC | SDEV_VTOR },
535
536 { "vt", devvt_vnodeops_tbl, NULL, &devvt_vnodeops, devvt_validate,
537 SDEV_DYNAMIC | SDEV_VTOR },
538
539 { "zvol", devzvol_vnodeops_tbl, NULL, &devzvol_vnodeops,
540 devzvol_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR },
541
542 { "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE },
543
544 { "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate,
545 SDEV_DYNAMIC | SDEV_VTOR },
546
547 { "ipnet", devipnet_vnodeops_tbl, NULL, &devipnet_vnodeops,
548 devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE },
549
550 /*
551 * SDEV_DYNAMIC: prevent calling out to devfsadm, since only the
552 * lofi driver controls child nodes.
553 *
554 * SDEV_PERSIST: ensure devfsadm knows to clean up any persisted
555 * stale nodes (e.g. from devfsadm -R).
556 *
557 * In addition, devfsadm knows not to attempt a rmdir: a zone
558 * may hold a reference, which would zombify the node,
559 * preventing a mkdir.
560 */
561
562 { "lofi", NULL, NULL, NULL, NULL,
563 SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST },
564 { "rlofi", NULL, NULL, NULL, NULL,
565 SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST },
566
567 { NULL, NULL, NULL, NULL, NULL, 0}
568 };
569
570 struct sdev_vop_table *
sdev_match(struct sdev_node * dv)571 sdev_match(struct sdev_node *dv)
572 {
573 int vlen;
574 int i;
575
576 for (i = 0; vtab[i].vt_name; i++) {
577 if (strcmp(vtab[i].vt_name, dv->sdev_name) == 0)
578 return (&vtab[i]);
579 if (vtab[i].vt_flags & SDEV_SUBDIR) {
580 char *ptr;
581
582 ASSERT(strlen(dv->sdev_path) > 5);
583 ptr = dv->sdev_path + 5;
584 vlen = strlen(vtab[i].vt_name);
585 if ((strncmp(vtab[i].vt_name, ptr,
586 vlen - 1) == 0) && ptr[vlen] == '/')
587 return (&vtab[i]);
588 }
589
590 }
591 return (NULL);
592 }
593
594 /*
595 * sets a directory's vnodeops if the directory is in the vtab;
596 */
597 static struct vnodeops *
sdev_get_vop(struct sdev_node * dv)598 sdev_get_vop(struct sdev_node *dv)
599 {
600 struct sdev_vop_table *vtp;
601 char *path;
602
603 path = dv->sdev_path;
604 ASSERT(path);
605
606 /* gets the relative path to /dev/ */
607 path += 5;
608
609 /* gets the vtab entry it matches */
610 if ((vtp = sdev_match(dv)) != NULL) {
611 dv->sdev_flags |= vtp->vt_flags;
612
613 if (vtp->vt_vops) {
614 if (vtp->vt_global_vops)
615 *(vtp->vt_global_vops) = vtp->vt_vops;
616 return (vtp->vt_vops);
617 }
618
619 if (vtp->vt_service) {
620 fs_operation_def_t *templ;
621 templ = sdev_merge_vtab(vtp->vt_service);
622 if (vn_make_ops(vtp->vt_name,
623 (const fs_operation_def_t *)templ,
624 &vtp->vt_vops) != 0) {
625 cmn_err(CE_PANIC, "%s: malformed vnode ops\n",
626 vtp->vt_name);
627 /*NOTREACHED*/
628 }
629 if (vtp->vt_global_vops) {
630 *(vtp->vt_global_vops) = vtp->vt_vops;
631 }
632 sdev_free_vtab(templ);
633 return (vtp->vt_vops);
634 }
635 return (sdev_vnodeops);
636 }
637
638 /* child inherits the persistence of the parent */
639 if (SDEV_IS_PERSIST(dv->sdev_dotdot))
640 dv->sdev_flags |= SDEV_PERSIST;
641
642 return (sdev_vnodeops);
643 }
644
645 static void
sdev_set_no_negcache(struct sdev_node * dv)646 sdev_set_no_negcache(struct sdev_node *dv)
647 {
648 int i;
649 char *path;
650
651 ASSERT(dv->sdev_path);
652 path = dv->sdev_path + strlen("/dev/");
653
654 for (i = 0; vtab[i].vt_name; i++) {
655 if (strcmp(vtab[i].vt_name, path) == 0) {
656 if (vtab[i].vt_flags & SDEV_NO_NCACHE)
657 dv->sdev_flags |= SDEV_NO_NCACHE;
658 break;
659 }
660 }
661 }
662
663 void *
sdev_get_vtor(struct sdev_node * dv)664 sdev_get_vtor(struct sdev_node *dv)
665 {
666 struct sdev_vop_table *vtp;
667
668 vtp = sdev_match(dv);
669 if (vtp)
670 return ((void *)vtp->vt_vtor);
671 else
672 return (NULL);
673 }
674
675 /*
676 * Build the base root inode
677 */
678 ino_t
sdev_mkino(struct sdev_node * dv)679 sdev_mkino(struct sdev_node *dv)
680 {
681 ino_t ino;
682
683 /*
684 * for now, follow the lead of tmpfs here
685 * need to someday understand the requirements here
686 */
687 ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3);
688 ino += SDEV_ROOTINO + 1;
689
690 return (ino);
691 }
692
693 int
sdev_getlink(struct vnode * linkvp,char ** link)694 sdev_getlink(struct vnode *linkvp, char **link)
695 {
696 int err;
697 char *buf;
698 struct uio uio = {0};
699 struct iovec iov = {0};
700
701 if (linkvp == NULL)
702 return (ENOENT);
703 ASSERT(linkvp->v_type == VLNK);
704
705 buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
706 iov.iov_base = buf;
707 iov.iov_len = MAXPATHLEN;
708 uio.uio_iov = &iov;
709 uio.uio_iovcnt = 1;
710 uio.uio_resid = MAXPATHLEN;
711 uio.uio_segflg = UIO_SYSSPACE;
712 uio.uio_llimit = MAXOFFSET_T;
713
714 err = VOP_READLINK(linkvp, &uio, kcred, NULL);
715 if (err) {
716 cmn_err(CE_WARN, "readlink %s failed in dev\n", buf);
717 kmem_free(buf, MAXPATHLEN);
718 return (ENOENT);
719 }
720
721 /* mission complete */
722 *link = i_ddi_strdup(buf, KM_SLEEP);
723 kmem_free(buf, MAXPATHLEN);
724 return (0);
725 }
726
727 /*
728 * A convenient wrapper to get the devfs node vnode for a device
729 * minor functionality: readlink() of a /dev symlink
730 * Place the link into dv->sdev_symlink
731 */
732 static int
sdev_follow_link(struct sdev_node * dv)733 sdev_follow_link(struct sdev_node *dv)
734 {
735 int err;
736 struct vnode *linkvp;
737 char *link = NULL;
738
739 linkvp = SDEVTOV(dv);
740 if (linkvp == NULL)
741 return (ENOENT);
742 ASSERT(linkvp->v_type == VLNK);
743 err = sdev_getlink(linkvp, &link);
744 if (err) {
745 (void) sdev_nodezombied(dv);
746 dv->sdev_symlink = NULL;
747 return (ENOENT);
748 }
749
750 ASSERT(link != NULL);
751 dv->sdev_symlink = link;
752 return (0);
753 }
754
755 static int
sdev_node_check(struct sdev_node * dv,struct vattr * nvap,void * nargs)756 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs)
757 {
758 vtype_t otype = SDEVTOV(dv)->v_type;
759
760 /*
761 * existing sdev_node has a different type.
762 */
763 if (otype != nvap->va_type) {
764 sdcmn_err9(("sdev_node_check: existing node "
765 " %s type %d does not match new node type %d\n",
766 dv->sdev_name, otype, nvap->va_type));
767 return (EEXIST);
768 }
769
770 /*
771 * For a symlink, the target should be the same.
772 */
773 if (otype == VLNK) {
774 ASSERT(nargs != NULL);
775 ASSERT(dv->sdev_symlink != NULL);
776 if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) {
777 sdcmn_err9(("sdev_node_check: existing node "
778 " %s has different symlink %s as new node "
779 " %s\n", dv->sdev_name, dv->sdev_symlink,
780 (char *)nargs));
781 return (EEXIST);
782 }
783 }
784
785 return (0);
786 }
787
788 /*
789 * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready()
790 *
791 * arguments:
792 * - ddv (parent)
793 * - nm (child name)
794 * - newdv (sdev_node for nm is returned here)
795 * - vap (vattr for the node to be created, va_type should be set.
796 * - avp (attribute vnode)
797 * the defaults should be used if unknown)
798 * - cred
799 * - args
800 * . tnm (for VLNK)
801 * . global sdev_node (for !SDEV_GLOBAL)
802 * - state: SDEV_INIT, SDEV_READY
803 *
804 * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT)
805 *
806 * NOTE: directory contents writers lock needs to be held before
807 * calling this routine.
808 */
809 int
sdev_mknode(struct sdev_node * ddv,char * nm,struct sdev_node ** newdv,struct vattr * vap,struct vnode * avp,void * args,struct cred * cred,sdev_node_state_t state)810 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
811 struct vattr *vap, struct vnode *avp, void *args, struct cred *cred,
812 sdev_node_state_t state)
813 {
814 int error = 0;
815 sdev_node_state_t node_state;
816 struct sdev_node *dv = NULL;
817
818 ASSERT(state != SDEV_ZOMBIE);
819 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
820
821 if (*newdv) {
822 dv = *newdv;
823 } else {
824 /* allocate and initialize a sdev_node */
825 if (ddv->sdev_state == SDEV_ZOMBIE) {
826 sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n",
827 ddv->sdev_path));
828 return (ENOENT);
829 }
830
831 error = sdev_nodeinit(ddv, nm, &dv, vap);
832 if (error != 0) {
833 sdcmn_err9(("sdev_mknode: error %d,"
834 " name %s can not be initialized\n",
835 error, nm));
836 return (error);
837 }
838 ASSERT(dv);
839
840 /* insert into the directory cache */
841 error = sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD);
842 if (error) {
843 sdcmn_err9(("sdev_mknode: node %s can not"
844 " be added into directory cache\n", nm));
845 return (ENOENT);
846 }
847 }
848
849 ASSERT(dv);
850 node_state = dv->sdev_state;
851 ASSERT(node_state != SDEV_ZOMBIE);
852
853 if (state == SDEV_READY) {
854 switch (node_state) {
855 case SDEV_INIT:
856 error = sdev_nodeready(dv, vap, avp, args, cred);
857 if (error) {
858 sdcmn_err9(("sdev_mknode: node %s can NOT"
859 " be transitioned into READY state, "
860 "error %d\n", nm, error));
861 }
862 break;
863 case SDEV_READY:
864 /*
865 * Do some sanity checking to make sure
866 * the existing sdev_node is what has been
867 * asked for.
868 */
869 error = sdev_node_check(dv, vap, args);
870 break;
871 default:
872 break;
873 }
874 }
875
876 if (!error) {
877 *newdv = dv;
878 ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE);
879 } else {
880 SDEV_SIMPLE_RELE(dv);
881 *newdv = NULL;
882 }
883
884 return (error);
885 }
886
887 /*
888 * convenient wrapper to change vp's ATIME, CTIME and MTIME
889 */
890 void
sdev_update_timestamps(struct vnode * vp,cred_t * cred,uint_t mask)891 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask)
892 {
893 struct vattr attr;
894 timestruc_t now;
895 int err;
896
897 ASSERT(vp);
898 gethrestime(&now);
899 if (mask & AT_CTIME)
900 attr.va_ctime = now;
901 if (mask & AT_MTIME)
902 attr.va_mtime = now;
903 if (mask & AT_ATIME)
904 attr.va_atime = now;
905
906 attr.va_mask = (mask & AT_TIMES);
907 err = VOP_SETATTR(vp, &attr, 0, cred, NULL);
908 if (err && (err != EROFS)) {
909 sdcmn_err(("update timestamps error %d\n", err));
910 }
911 }
912
913 /*
914 * the backing store vnode is released here
915 */
916 /*ARGSUSED1*/
917 void
sdev_nodedestroy(struct sdev_node * dv,uint_t flags)918 sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
919 {
920 /* no references */
921 ASSERT(dv->sdev_nlink == 0);
922
923 if (dv->sdev_attrvp != NULLVP) {
924 VN_RELE(dv->sdev_attrvp);
925 /*
926 * reset the attrvp so that no more
927 * references can be made on this already
928 * vn_rele() vnode
929 */
930 dv->sdev_attrvp = NULLVP;
931 }
932
933 if (dv->sdev_attr != NULL) {
934 kmem_free(dv->sdev_attr, sizeof (struct vattr));
935 dv->sdev_attr = NULL;
936 }
937
938 if (dv->sdev_name != NULL) {
939 kmem_free(dv->sdev_name, dv->sdev_namelen + 1);
940 dv->sdev_name = NULL;
941 }
942
943 if (dv->sdev_symlink != NULL) {
944 kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1);
945 dv->sdev_symlink = NULL;
946 }
947
948 if (dv->sdev_path) {
949 kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1);
950 dv->sdev_path = NULL;
951 }
952
953 if (!SDEV_IS_GLOBAL(dv))
954 sdev_prof_free(dv);
955
956 if (SDEVTOV(dv)->v_type == VDIR) {
957 ASSERT(SDEV_FIRST_ENTRY(dv) == NULL);
958 avl_destroy(&dv->sdev_entries);
959 }
960
961 mutex_destroy(&dv->sdev_lookup_lock);
962 cv_destroy(&dv->sdev_lookup_cv);
963
964 /* return node to initial state as per constructor */
965 (void) memset((void *)&dv->sdev_instance_data, 0,
966 sizeof (dv->sdev_instance_data));
967 vn_invalid(SDEVTOV(dv));
968 kmem_cache_free(sdev_node_cache, dv);
969 }
970
971 /*
972 * DIRECTORY CACHE lookup
973 */
974 struct sdev_node *
sdev_findbyname(struct sdev_node * ddv,char * nm)975 sdev_findbyname(struct sdev_node *ddv, char *nm)
976 {
977 struct sdev_node *dv;
978 struct sdev_node dvtmp;
979 avl_index_t where;
980
981 ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
982
983 dvtmp.sdev_name = nm;
984 dv = avl_find(&ddv->sdev_entries, &dvtmp, &where);
985 if (dv) {
986 ASSERT(dv->sdev_dotdot == ddv);
987 ASSERT(strcmp(dv->sdev_name, nm) == 0);
988 SDEV_HOLD(dv);
989 return (dv);
990 }
991 return (NULL);
992 }
993
994 /*
995 * Inserts a new sdev_node in a parent directory
996 */
997 void
sdev_direnter(struct sdev_node * ddv,struct sdev_node * dv)998 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv)
999 {
1000 avl_index_t where;
1001
1002 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1003 ASSERT(SDEVTOV(ddv)->v_type == VDIR);
1004 ASSERT(ddv->sdev_nlink >= 2);
1005 ASSERT(dv->sdev_nlink == 0);
1006
1007 dv->sdev_dotdot = ddv;
1008 VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL);
1009 avl_insert(&ddv->sdev_entries, dv, where);
1010 ddv->sdev_nlink++;
1011 }
1012
1013 /*
1014 * The following check is needed because while sdev_nodes are linked
1015 * in SDEV_INIT state, they have their link counts incremented only
1016 * in SDEV_READY state.
1017 */
1018 static void
decr_link(struct sdev_node * dv)1019 decr_link(struct sdev_node *dv)
1020 {
1021 if (dv->sdev_state != SDEV_INIT)
1022 dv->sdev_nlink--;
1023 else
1024 ASSERT(dv->sdev_nlink == 0);
1025 }
1026
1027 /*
1028 * Delete an existing dv from directory cache
1029 *
1030 * In the case of a node is still held by non-zero reference count,
1031 * the node is put into ZOMBIE state. Once the reference count
1032 * reaches "0", the node is unlinked and destroyed,
1033 * in sdev_inactive().
1034 */
1035 static int
sdev_dirdelete(struct sdev_node * ddv,struct sdev_node * dv)1036 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv)
1037 {
1038 struct vnode *vp;
1039
1040 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1041
1042 vp = SDEVTOV(dv);
1043 mutex_enter(&vp->v_lock);
1044
1045 /* dv is held still */
1046 if (vp->v_count > 1) {
1047 rw_enter(&dv->sdev_contents, RW_WRITER);
1048 if (dv->sdev_state == SDEV_READY) {
1049 sdcmn_err9((
1050 "sdev_dirdelete: node %s busy with count %d\n",
1051 dv->sdev_name, vp->v_count));
1052 dv->sdev_state = SDEV_ZOMBIE;
1053 }
1054 rw_exit(&dv->sdev_contents);
1055 --vp->v_count;
1056 mutex_exit(&vp->v_lock);
1057 return (EBUSY);
1058 }
1059 ASSERT(vp->v_count == 1);
1060
1061 /* unlink from the memory cache */
1062 ddv->sdev_nlink--; /* .. to above */
1063 if (vp->v_type == VDIR) {
1064 decr_link(dv); /* . to self */
1065 }
1066
1067 avl_remove(&ddv->sdev_entries, dv);
1068 decr_link(dv); /* name, back to zero */
1069 vp->v_count--;
1070 mutex_exit(&vp->v_lock);
1071
1072 /* destroy the node */
1073 sdev_nodedestroy(dv, 0);
1074 return (0);
1075 }
1076
1077 /*
1078 * check if the source is in the path of the target
1079 *
1080 * source and target are different
1081 */
1082 /*ARGSUSED2*/
1083 static int
sdev_checkpath(struct sdev_node * sdv,struct sdev_node * tdv,struct cred * cred)1084 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred)
1085 {
1086 int error = 0;
1087 struct sdev_node *dotdot, *dir;
1088
1089 dotdot = tdv->sdev_dotdot;
1090 ASSERT(dotdot);
1091
1092 /* fs root */
1093 if (dotdot == tdv) {
1094 return (0);
1095 }
1096
1097 for (;;) {
1098 /*
1099 * avoid error cases like
1100 * mv a a/b
1101 * mv a a/b/c
1102 * etc.
1103 */
1104 if (dotdot == sdv) {
1105 error = EINVAL;
1106 break;
1107 }
1108
1109 dir = dotdot;
1110 dotdot = dir->sdev_dotdot;
1111
1112 /* done checking because root is reached */
1113 if (dir == dotdot) {
1114 break;
1115 }
1116 }
1117 return (error);
1118 }
1119
1120 int
sdev_rnmnode(struct sdev_node * oddv,struct sdev_node * odv,struct sdev_node * nddv,struct sdev_node ** ndvp,char * nnm,struct cred * cred)1121 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,
1122 struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm,
1123 struct cred *cred)
1124 {
1125 int error = 0;
1126 struct vnode *ovp = SDEVTOV(odv);
1127 struct vnode *nvp;
1128 struct vattr vattr;
1129 int doingdir = (ovp->v_type == VDIR);
1130 char *link = NULL;
1131 int samedir = (oddv == nddv) ? 1 : 0;
1132 int bkstore = 0;
1133 struct sdev_node *idv = NULL;
1134 struct sdev_node *ndv = NULL;
1135 timestruc_t now;
1136
1137 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1138 error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL);
1139 if (error)
1140 return (error);
1141
1142 if (!samedir)
1143 rw_enter(&oddv->sdev_contents, RW_WRITER);
1144 rw_enter(&nddv->sdev_contents, RW_WRITER);
1145
1146 /*
1147 * the source may have been deleted by another thread before
1148 * we gets here.
1149 */
1150 if (odv->sdev_state != SDEV_READY) {
1151 error = ENOENT;
1152 goto err_out;
1153 }
1154
1155 if (doingdir && (odv == nddv)) {
1156 error = EINVAL;
1157 goto err_out;
1158 }
1159
1160 /*
1161 * If renaming a directory, and the parents are different (".." must be
1162 * changed) then the source dir must not be in the dir hierarchy above
1163 * the target since it would orphan everything below the source dir.
1164 */
1165 if (doingdir && (oddv != nddv)) {
1166 error = sdev_checkpath(odv, nddv, cred);
1167 if (error)
1168 goto err_out;
1169 }
1170
1171 /* destination existing */
1172 if (*ndvp) {
1173 nvp = SDEVTOV(*ndvp);
1174 ASSERT(nvp);
1175
1176 /* handling renaming to itself */
1177 if (odv == *ndvp) {
1178 error = 0;
1179 goto err_out;
1180 }
1181
1182 if (nvp->v_type == VDIR) {
1183 if (!doingdir) {
1184 error = EISDIR;
1185 goto err_out;
1186 }
1187
1188 if (vn_vfswlock(nvp)) {
1189 error = EBUSY;
1190 goto err_out;
1191 }
1192
1193 if (vn_mountedvfs(nvp) != NULL) {
1194 vn_vfsunlock(nvp);
1195 error = EBUSY;
1196 goto err_out;
1197 }
1198
1199 /* in case dir1 exists in dir2 and "mv dir1 dir2" */
1200 if ((*ndvp)->sdev_nlink > 2) {
1201 vn_vfsunlock(nvp);
1202 error = EEXIST;
1203 goto err_out;
1204 }
1205 vn_vfsunlock(nvp);
1206
1207 (void) sdev_dirdelete(nddv, *ndvp);
1208 *ndvp = NULL;
1209 ASSERT(nddv->sdev_attrvp);
1210 error = VOP_RMDIR(nddv->sdev_attrvp, nnm,
1211 nddv->sdev_attrvp, cred, NULL, 0);
1212 if (error)
1213 goto err_out;
1214 } else {
1215 if (doingdir) {
1216 error = ENOTDIR;
1217 goto err_out;
1218 }
1219
1220 if (SDEV_IS_PERSIST((*ndvp))) {
1221 bkstore = 1;
1222 }
1223
1224 /*
1225 * get rid of the node from the directory cache
1226 * note, in case EBUSY is returned, the ZOMBIE
1227 * node is taken care in sdev_mknode.
1228 */
1229 (void) sdev_dirdelete(nddv, *ndvp);
1230 *ndvp = NULL;
1231 if (bkstore) {
1232 ASSERT(nddv->sdev_attrvp);
1233 error = VOP_REMOVE(nddv->sdev_attrvp,
1234 nnm, cred, NULL, 0);
1235 if (error)
1236 goto err_out;
1237 }
1238 }
1239 }
1240
1241 /* fix the source for a symlink */
1242 if (vattr.va_type == VLNK) {
1243 if (odv->sdev_symlink == NULL) {
1244 error = sdev_follow_link(odv);
1245 if (error) {
1246 error = ENOENT;
1247 goto err_out;
1248 }
1249 }
1250 ASSERT(odv->sdev_symlink);
1251 link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP);
1252 }
1253
1254 /*
1255 * make a fresh node from the source attrs
1256 */
1257 ASSERT(RW_WRITE_HELD(&nddv->sdev_contents));
1258 error = sdev_mknode(nddv, nnm, ndvp, &vattr,
1259 NULL, (void *)link, cred, SDEV_READY);
1260
1261 if (link)
1262 kmem_free(link, strlen(link) + 1);
1263
1264 if (error)
1265 goto err_out;
1266 ASSERT(*ndvp);
1267 ASSERT((*ndvp)->sdev_state == SDEV_READY);
1268
1269 /* move dir contents */
1270 if (doingdir) {
1271 for (idv = SDEV_FIRST_ENTRY(odv); idv;
1272 idv = SDEV_NEXT_ENTRY(odv, idv)) {
1273 error = sdev_rnmnode(odv, idv,
1274 (struct sdev_node *)(*ndvp), &ndv,
1275 idv->sdev_name, cred);
1276 if (error)
1277 goto err_out;
1278 ndv = NULL;
1279 }
1280 }
1281
1282 if ((*ndvp)->sdev_attrvp) {
1283 sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred,
1284 AT_CTIME|AT_ATIME);
1285 } else {
1286 ASSERT((*ndvp)->sdev_attr);
1287 gethrestime(&now);
1288 (*ndvp)->sdev_attr->va_ctime = now;
1289 (*ndvp)->sdev_attr->va_atime = now;
1290 }
1291
1292 if (nddv->sdev_attrvp) {
1293 sdev_update_timestamps(nddv->sdev_attrvp, kcred,
1294 AT_MTIME|AT_ATIME);
1295 } else {
1296 ASSERT(nddv->sdev_attr);
1297 gethrestime(&now);
1298 nddv->sdev_attr->va_mtime = now;
1299 nddv->sdev_attr->va_atime = now;
1300 }
1301 rw_exit(&nddv->sdev_contents);
1302 if (!samedir)
1303 rw_exit(&oddv->sdev_contents);
1304
1305 SDEV_RELE(*ndvp);
1306 return (error);
1307
1308 err_out:
1309 rw_exit(&nddv->sdev_contents);
1310 if (!samedir)
1311 rw_exit(&oddv->sdev_contents);
1312 return (error);
1313 }
1314
1315 /*
1316 * Merge sdev_node specific information into an attribute structure.
1317 *
1318 * note: sdev_node is not locked here
1319 */
1320 void
sdev_vattr_merge(struct sdev_node * dv,struct vattr * vap)1321 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap)
1322 {
1323 struct vnode *vp = SDEVTOV(dv);
1324
1325 vap->va_nlink = dv->sdev_nlink;
1326 vap->va_nodeid = dv->sdev_ino;
1327 vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev;
1328 vap->va_type = vp->v_type;
1329
1330 if (vp->v_type == VDIR) {
1331 vap->va_rdev = 0;
1332 vap->va_fsid = vp->v_rdev;
1333 } else if (vp->v_type == VLNK) {
1334 vap->va_rdev = 0;
1335 vap->va_mode &= ~S_IFMT;
1336 vap->va_mode |= S_IFLNK;
1337 } else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) {
1338 vap->va_rdev = vp->v_rdev;
1339 vap->va_mode &= ~S_IFMT;
1340 if (vap->va_type == VCHR)
1341 vap->va_mode |= S_IFCHR;
1342 else
1343 vap->va_mode |= S_IFBLK;
1344 } else {
1345 vap->va_rdev = 0;
1346 }
1347 }
1348
1349 struct vattr *
sdev_getdefault_attr(enum vtype type)1350 sdev_getdefault_attr(enum vtype type)
1351 {
1352 if (type == VDIR)
1353 return (&sdev_vattr_dir);
1354 else if (type == VCHR)
1355 return (&sdev_vattr_chr);
1356 else if (type == VBLK)
1357 return (&sdev_vattr_blk);
1358 else if (type == VLNK)
1359 return (&sdev_vattr_lnk);
1360 else
1361 return (NULL);
1362 }
1363 int
sdev_to_vp(struct sdev_node * dv,struct vnode ** vpp)1364 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp)
1365 {
1366 int rv = 0;
1367 struct vnode *vp = SDEVTOV(dv);
1368
1369 switch (vp->v_type) {
1370 case VCHR:
1371 case VBLK:
1372 /*
1373 * If vnode is a device, return special vnode instead
1374 * (though it knows all about -us- via sp->s_realvp)
1375 */
1376 *vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred);
1377 VN_RELE(vp);
1378 if (*vpp == NULLVP)
1379 rv = ENOSYS;
1380 break;
1381 default: /* most types are returned as is */
1382 *vpp = vp;
1383 break;
1384 }
1385 return (rv);
1386 }
1387
1388 /*
1389 * junction between devname and root file system, e.g. ufs
1390 */
1391 int
devname_backstore_lookup(struct sdev_node * ddv,char * nm,struct vnode ** rvp)1392 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp)
1393 {
1394 struct vnode *rdvp = ddv->sdev_attrvp;
1395 int rval = 0;
1396
1397 ASSERT(rdvp);
1398
1399 rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL,
1400 NULL);
1401 return (rval);
1402 }
1403
1404 static int
sdev_filldir_from_store(struct sdev_node * ddv,int dlen,struct cred * cred)1405 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred)
1406 {
1407 struct sdev_node *dv = NULL;
1408 char *nm;
1409 struct vnode *dirvp;
1410 int error;
1411 vnode_t *vp;
1412 int eof;
1413 struct iovec iov;
1414 struct uio uio;
1415 struct dirent64 *dp;
1416 dirent64_t *dbuf;
1417 size_t dbuflen;
1418 struct vattr vattr;
1419 char *link = NULL;
1420
1421 if (ddv->sdev_attrvp == NULL)
1422 return (0);
1423 if (!(ddv->sdev_flags & SDEV_BUILD))
1424 return (0);
1425
1426 dirvp = ddv->sdev_attrvp;
1427 VN_HOLD(dirvp);
1428 dbuf = kmem_zalloc(dlen, KM_SLEEP);
1429
1430 uio.uio_iov = &iov;
1431 uio.uio_iovcnt = 1;
1432 uio.uio_segflg = UIO_SYSSPACE;
1433 uio.uio_fmode = 0;
1434 uio.uio_extflg = UIO_COPY_CACHED;
1435 uio.uio_loffset = 0;
1436 uio.uio_llimit = MAXOFFSET_T;
1437
1438 eof = 0;
1439 error = 0;
1440 while (!error && !eof) {
1441 uio.uio_resid = dlen;
1442 iov.iov_base = (char *)dbuf;
1443 iov.iov_len = dlen;
1444 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1445 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1446 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1447
1448 dbuflen = dlen - uio.uio_resid;
1449 if (error || dbuflen == 0)
1450 break;
1451
1452 if (!(ddv->sdev_flags & SDEV_BUILD))
1453 break;
1454
1455 for (dp = dbuf; ((intptr_t)dp <
1456 (intptr_t)dbuf + dbuflen);
1457 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1458 nm = dp->d_name;
1459
1460 if (strcmp(nm, ".") == 0 ||
1461 strcmp(nm, "..") == 0)
1462 continue;
1463
1464 vp = NULLVP;
1465 dv = sdev_cache_lookup(ddv, nm);
1466 if (dv) {
1467 if (dv->sdev_state != SDEV_ZOMBIE) {
1468 SDEV_SIMPLE_RELE(dv);
1469 } else {
1470 /*
1471 * A ZOMBIE node may not have been
1472 * cleaned up from the backing store,
1473 * bypass this entry in this case,
1474 * and clean it up from the directory
1475 * cache if this is the last call.
1476 */
1477 (void) sdev_dirdelete(ddv, dv);
1478 }
1479 continue;
1480 }
1481
1482 /* refill the cache if not already */
1483 error = devname_backstore_lookup(ddv, nm, &vp);
1484 if (error)
1485 continue;
1486
1487 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1488 error = VOP_GETATTR(vp, &vattr, 0, cred, NULL);
1489 if (error)
1490 continue;
1491
1492 if (vattr.va_type == VLNK) {
1493 error = sdev_getlink(vp, &link);
1494 if (error) {
1495 continue;
1496 }
1497 ASSERT(link != NULL);
1498 }
1499
1500 if (!rw_tryupgrade(&ddv->sdev_contents)) {
1501 rw_exit(&ddv->sdev_contents);
1502 rw_enter(&ddv->sdev_contents, RW_WRITER);
1503 }
1504 error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link,
1505 cred, SDEV_READY);
1506 rw_downgrade(&ddv->sdev_contents);
1507
1508 if (link != NULL) {
1509 kmem_free(link, strlen(link) + 1);
1510 link = NULL;
1511 }
1512
1513 if (!error) {
1514 ASSERT(dv);
1515 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1516 SDEV_SIMPLE_RELE(dv);
1517 }
1518 vp = NULL;
1519 dv = NULL;
1520 }
1521 }
1522
1523 done:
1524 VN_RELE(dirvp);
1525 kmem_free(dbuf, dlen);
1526
1527 return (error);
1528 }
1529
1530 void
sdev_filldir_dynamic(struct sdev_node * ddv)1531 sdev_filldir_dynamic(struct sdev_node *ddv)
1532 {
1533 int error;
1534 int i;
1535 struct vattr vattr;
1536 struct vattr *vap = &vattr;
1537 char *nm = NULL;
1538 struct sdev_node *dv = NULL;
1539
1540 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1541 ASSERT((ddv->sdev_flags & SDEV_BUILD));
1542
1543 *vap = *sdev_getdefault_attr(VDIR); /* note structure copy here */
1544 gethrestime(&vap->va_atime);
1545 vap->va_mtime = vap->va_atime;
1546 vap->va_ctime = vap->va_atime;
1547 for (i = 0; vtab[i].vt_name != NULL; i++) {
1548 /*
1549 * This early, we may be in a read-only /dev
1550 * environment: leave the creation of any nodes we'd
1551 * attempt to persist to devfsadm.
1552 */
1553 if (vtab[i].vt_flags & SDEV_PERSIST)
1554 continue;
1555 nm = vtab[i].vt_name;
1556 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1557 dv = NULL;
1558 error = sdev_mknode(ddv, nm, &dv, vap, NULL,
1559 NULL, kcred, SDEV_READY);
1560 if (error) {
1561 cmn_err(CE_WARN, "%s/%s: error %d\n",
1562 ddv->sdev_name, nm, error);
1563 } else {
1564 ASSERT(dv);
1565 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1566 SDEV_SIMPLE_RELE(dv);
1567 }
1568 }
1569 }
1570
1571 /*
1572 * Creating a backing store entry based on sdev_attr.
1573 * This is called either as part of node creation in a persistent directory
1574 * or from setattr/setsecattr to persist access attributes across reboot.
1575 */
1576 int
sdev_shadow_node(struct sdev_node * dv,struct cred * cred)1577 sdev_shadow_node(struct sdev_node *dv, struct cred *cred)
1578 {
1579 int error = 0;
1580 struct vnode *dvp = SDEVTOV(dv->sdev_dotdot);
1581 struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp;
1582 struct vattr *vap = dv->sdev_attr;
1583 char *nm = dv->sdev_name;
1584 struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL;
1585
1586 ASSERT(dv && dv->sdev_name && rdvp);
1587 ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL);
1588
1589 lookup:
1590 /* try to find it in the backing store */
1591 error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL,
1592 NULL);
1593 if (error == 0) {
1594 if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) {
1595 VN_HOLD(rrvp);
1596 VN_RELE(*rvp);
1597 *rvp = rrvp;
1598 }
1599
1600 kmem_free(dv->sdev_attr, sizeof (vattr_t));
1601 dv->sdev_attr = NULL;
1602 dv->sdev_attrvp = *rvp;
1603 return (0);
1604 }
1605
1606 /* let's try to persist the node */
1607 gethrestime(&vap->va_atime);
1608 vap->va_mtime = vap->va_atime;
1609 vap->va_ctime = vap->va_atime;
1610 vap->va_mask |= AT_TYPE|AT_MODE;
1611 switch (vap->va_type) {
1612 case VDIR:
1613 error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL);
1614 sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n",
1615 (void *)(*rvp), error));
1616 break;
1617 case VCHR:
1618 case VBLK:
1619 case VREG:
1620 case VDOOR:
1621 error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE,
1622 rvp, cred, 0, NULL, NULL);
1623 sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n",
1624 (void *)(*rvp), error));
1625 if (!error)
1626 VN_RELE(*rvp);
1627 break;
1628 case VLNK:
1629 ASSERT(dv->sdev_symlink);
1630 error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred,
1631 NULL, 0);
1632 sdcmn_err9(("sdev_shadow_node: create symlink error %d\n",
1633 error));
1634 break;
1635 default:
1636 cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node "
1637 "create\n", nm);
1638 /*NOTREACHED*/
1639 }
1640
1641 /* go back to lookup to factor out spec node and set attrvp */
1642 if (error == 0)
1643 goto lookup;
1644
1645 sdcmn_err(("cannot persist %s - error %d\n", dv->sdev_path, error));
1646 return (error);
1647 }
1648
1649 static int
sdev_cache_add(struct sdev_node * ddv,struct sdev_node ** dv,char * nm)1650 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm)
1651 {
1652 int error = 0;
1653 struct sdev_node *dup = NULL;
1654
1655 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1656 if ((dup = sdev_findbyname(ddv, nm)) == NULL) {
1657 sdev_direnter(ddv, *dv);
1658 } else {
1659 if (dup->sdev_state == SDEV_ZOMBIE) {
1660 error = sdev_dirdelete(ddv, dup);
1661 /*
1662 * The ZOMBIE node is still hanging
1663 * around with more than one reference counts.
1664 * Fail the new node creation so that
1665 * the directory cache won't have
1666 * duplicate entries for the same named node
1667 */
1668 if (error == EBUSY) {
1669 SDEV_SIMPLE_RELE(*dv);
1670 sdev_nodedestroy(*dv, 0);
1671 *dv = NULL;
1672 return (error);
1673 }
1674 sdev_direnter(ddv, *dv);
1675 } else {
1676 ASSERT((*dv)->sdev_state != SDEV_ZOMBIE);
1677 SDEV_SIMPLE_RELE(*dv);
1678 sdev_nodedestroy(*dv, 0);
1679 *dv = dup;
1680 }
1681 }
1682
1683 return (0);
1684 }
1685
1686 static int
sdev_cache_delete(struct sdev_node * ddv,struct sdev_node ** dv)1687 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv)
1688 {
1689 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1690 return (sdev_dirdelete(ddv, *dv));
1691 }
1692
1693 /*
1694 * update the in-core directory cache
1695 */
1696 int
sdev_cache_update(struct sdev_node * ddv,struct sdev_node ** dv,char * nm,sdev_cache_ops_t ops)1697 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm,
1698 sdev_cache_ops_t ops)
1699 {
1700 int error = 0;
1701
1702 ASSERT((SDEV_HELD(*dv)));
1703
1704 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1705 switch (ops) {
1706 case SDEV_CACHE_ADD:
1707 error = sdev_cache_add(ddv, dv, nm);
1708 break;
1709 case SDEV_CACHE_DELETE:
1710 error = sdev_cache_delete(ddv, dv);
1711 break;
1712 default:
1713 break;
1714 }
1715
1716 return (error);
1717 }
1718
1719 /*
1720 * retrieve the named entry from the directory cache
1721 */
1722 struct sdev_node *
sdev_cache_lookup(struct sdev_node * ddv,char * nm)1723 sdev_cache_lookup(struct sdev_node *ddv, char *nm)
1724 {
1725 struct sdev_node *dv = NULL;
1726
1727 ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
1728 dv = sdev_findbyname(ddv, nm);
1729
1730 return (dv);
1731 }
1732
1733 /*
1734 * Implicit reconfig for nodes constructed by a link generator
1735 * Start devfsadm if needed, or if devfsadm is in progress,
1736 * prepare to block on devfsadm either completing or
1737 * constructing the desired node. As devfsadmd is global
1738 * in scope, constructing all necessary nodes, we only
1739 * need to initiate it once.
1740 */
1741 static int
sdev_call_devfsadmd(struct sdev_node * ddv,struct sdev_node * dv,char * nm)1742 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm)
1743 {
1744 int error = 0;
1745
1746 if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
1747 sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n",
1748 ddv->sdev_name, nm, devfsadm_state));
1749 mutex_enter(&dv->sdev_lookup_lock);
1750 SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING));
1751 mutex_exit(&dv->sdev_lookup_lock);
1752 error = 0;
1753 } else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) {
1754 sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n",
1755 ddv->sdev_name, nm, devfsadm_state));
1756
1757 sdev_devfsadmd_thread(ddv, dv, kcred);
1758 mutex_enter(&dv->sdev_lookup_lock);
1759 SDEV_BLOCK_OTHERS(dv,
1760 (SDEV_LOOKUP | SDEV_LGWAITING));
1761 mutex_exit(&dv->sdev_lookup_lock);
1762 error = 0;
1763 } else {
1764 error = -1;
1765 }
1766
1767 return (error);
1768 }
1769
1770 /*
1771 * Support for specialized device naming construction mechanisms
1772 */
1773 static int
sdev_call_dircallback(struct sdev_node * ddv,struct sdev_node ** dvp,char * nm,int (* callback)(struct sdev_node *,char *,void **,struct cred *,void *,char *),int flags,struct cred * cred)1774 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1775 int (*callback)(struct sdev_node *, char *, void **, struct cred *,
1776 void *, char *), int flags, struct cred *cred)
1777 {
1778 int rv = 0;
1779 char *physpath = NULL;
1780 struct vattr vattr;
1781 struct vattr *vap = &vattr;
1782 struct sdev_node *dv = NULL;
1783
1784 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1785 if (flags & SDEV_VLINK) {
1786 physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1787 rv = callback(ddv, nm, (void *)&physpath, kcred, NULL,
1788 NULL);
1789 if (rv) {
1790 kmem_free(physpath, MAXPATHLEN);
1791 return (-1);
1792 }
1793
1794 *vap = *sdev_getdefault_attr(VLNK); /* structure copy */
1795 vap->va_size = strlen(physpath);
1796 gethrestime(&vap->va_atime);
1797 vap->va_mtime = vap->va_atime;
1798 vap->va_ctime = vap->va_atime;
1799
1800 rv = sdev_mknode(ddv, nm, &dv, vap, NULL,
1801 (void *)physpath, cred, SDEV_READY);
1802 kmem_free(physpath, MAXPATHLEN);
1803 if (rv)
1804 return (rv);
1805 } else if (flags & SDEV_VATTR) {
1806 /*
1807 * /dev/pts
1808 *
1809 * callback is responsible to set the basic attributes,
1810 * e.g. va_type/va_uid/va_gid/
1811 * dev_t if VCHR or VBLK/
1812 */
1813 ASSERT(callback);
1814 rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL);
1815 if (rv) {
1816 sdcmn_err3(("devname_lookup_func: SDEV_NONE "
1817 "callback failed \n"));
1818 return (-1);
1819 }
1820
1821 rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL,
1822 cred, SDEV_READY);
1823
1824 if (rv)
1825 return (rv);
1826
1827 } else {
1828 impossible(("lookup: %s/%s by %s not supported (%d)\n",
1829 SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm,
1830 __LINE__));
1831 rv = -1;
1832 }
1833
1834 *dvp = dv;
1835 return (rv);
1836 }
1837
1838 static int
is_devfsadm_thread(char * exec_name)1839 is_devfsadm_thread(char *exec_name)
1840 {
1841 /*
1842 * note: because devfsadmd -> /usr/sbin/devfsadm
1843 * it is safe to use "devfsadm" to capture the lookups
1844 * from devfsadm and its daemon version.
1845 */
1846 if (strcmp(exec_name, "devfsadm") == 0)
1847 return (1);
1848 return (0);
1849 }
1850
1851 /*
1852 * Lookup Order:
1853 * sdev_node cache;
1854 * backing store (SDEV_PERSIST);
1855 * DBNR: a. dir_ops implemented in the loadable modules;
1856 * b. vnode ops in vtab.
1857 */
1858 int
devname_lookup_func(struct sdev_node * ddv,char * nm,struct vnode ** vpp,struct cred * cred,int (* callback)(struct sdev_node *,char *,void **,struct cred *,void *,char *),int flags)1859 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp,
1860 struct cred *cred, int (*callback)(struct sdev_node *, char *, void **,
1861 struct cred *, void *, char *), int flags)
1862 {
1863 int rv = 0, nmlen;
1864 struct vnode *rvp = NULL;
1865 struct sdev_node *dv = NULL;
1866 int retried = 0;
1867 int error = 0;
1868 struct vattr vattr;
1869 char *lookup_thread = curproc->p_user.u_comm;
1870 int failed_flags = 0;
1871 int (*vtor)(struct sdev_node *) = NULL;
1872 int state;
1873 int parent_state;
1874 char *link = NULL;
1875
1876 if (SDEVTOV(ddv)->v_type != VDIR)
1877 return (ENOTDIR);
1878
1879 /*
1880 * Empty name or ., return node itself.
1881 */
1882 nmlen = strlen(nm);
1883 if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) {
1884 *vpp = SDEVTOV(ddv);
1885 VN_HOLD(*vpp);
1886 return (0);
1887 }
1888
1889 /*
1890 * .., return the parent directory
1891 */
1892 if ((nmlen == 2) && (strcmp(nm, "..") == 0)) {
1893 *vpp = SDEVTOV(ddv->sdev_dotdot);
1894 VN_HOLD(*vpp);
1895 return (0);
1896 }
1897
1898 rw_enter(&ddv->sdev_contents, RW_READER);
1899 if (ddv->sdev_flags & SDEV_VTOR) {
1900 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
1901 ASSERT(vtor);
1902 }
1903
1904 tryagain:
1905 /*
1906 * (a) directory cache lookup:
1907 */
1908 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1909 parent_state = ddv->sdev_state;
1910 dv = sdev_cache_lookup(ddv, nm);
1911 if (dv) {
1912 state = dv->sdev_state;
1913 switch (state) {
1914 case SDEV_INIT:
1915 if (is_devfsadm_thread(lookup_thread))
1916 break;
1917
1918 /* ZOMBIED parent won't allow node creation */
1919 if (parent_state == SDEV_ZOMBIE) {
1920 SD_TRACE_FAILED_LOOKUP(ddv, nm,
1921 retried);
1922 goto nolock_notfound;
1923 }
1924
1925 mutex_enter(&dv->sdev_lookup_lock);
1926 /* compensate the threads started after devfsadm */
1927 if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
1928 !(SDEV_IS_LOOKUP(dv)))
1929 SDEV_BLOCK_OTHERS(dv,
1930 (SDEV_LOOKUP | SDEV_LGWAITING));
1931
1932 if (SDEV_IS_LOOKUP(dv)) {
1933 failed_flags |= SLF_REBUILT;
1934 rw_exit(&ddv->sdev_contents);
1935 error = sdev_wait4lookup(dv, SDEV_LOOKUP);
1936 mutex_exit(&dv->sdev_lookup_lock);
1937 rw_enter(&ddv->sdev_contents, RW_READER);
1938
1939 if (error != 0) {
1940 SD_TRACE_FAILED_LOOKUP(ddv, nm,
1941 retried);
1942 goto nolock_notfound;
1943 }
1944
1945 state = dv->sdev_state;
1946 if (state == SDEV_INIT) {
1947 SD_TRACE_FAILED_LOOKUP(ddv, nm,
1948 retried);
1949 goto nolock_notfound;
1950 } else if (state == SDEV_READY) {
1951 goto found;
1952 } else if (state == SDEV_ZOMBIE) {
1953 rw_exit(&ddv->sdev_contents);
1954 SD_TRACE_FAILED_LOOKUP(ddv, nm,
1955 retried);
1956 SDEV_RELE(dv);
1957 goto lookup_failed;
1958 }
1959 } else {
1960 mutex_exit(&dv->sdev_lookup_lock);
1961 }
1962 break;
1963 case SDEV_READY:
1964 goto found;
1965 case SDEV_ZOMBIE:
1966 rw_exit(&ddv->sdev_contents);
1967 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1968 SDEV_RELE(dv);
1969 goto lookup_failed;
1970 default:
1971 rw_exit(&ddv->sdev_contents);
1972 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1973 sdev_lookup_failed(ddv, nm, failed_flags);
1974 *vpp = NULLVP;
1975 return (ENOENT);
1976 }
1977 }
1978 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1979
1980 /*
1981 * ZOMBIED parent does not allow new node creation.
1982 * bail out early
1983 */
1984 if (parent_state == SDEV_ZOMBIE) {
1985 rw_exit(&ddv->sdev_contents);
1986 *vpp = NULLVP;
1987 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1988 return (ENOENT);
1989 }
1990
1991 /*
1992 * (b0): backing store lookup
1993 * SDEV_PERSIST is default except:
1994 * 1) pts nodes
1995 * 2) non-chmod'ed local nodes
1996 * 3) zvol nodes
1997 */
1998 if (SDEV_IS_PERSIST(ddv)) {
1999 error = devname_backstore_lookup(ddv, nm, &rvp);
2000
2001 if (!error) {
2002
2003 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
2004 error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL);
2005 if (error) {
2006 rw_exit(&ddv->sdev_contents);
2007 if (dv)
2008 SDEV_RELE(dv);
2009 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2010 sdev_lookup_failed(ddv, nm, failed_flags);
2011 *vpp = NULLVP;
2012 return (ENOENT);
2013 }
2014
2015 if (vattr.va_type == VLNK) {
2016 error = sdev_getlink(rvp, &link);
2017 if (error) {
2018 rw_exit(&ddv->sdev_contents);
2019 if (dv)
2020 SDEV_RELE(dv);
2021 SD_TRACE_FAILED_LOOKUP(ddv, nm,
2022 retried);
2023 sdev_lookup_failed(ddv, nm,
2024 failed_flags);
2025 *vpp = NULLVP;
2026 return (ENOENT);
2027 }
2028 ASSERT(link != NULL);
2029 }
2030
2031 if (!rw_tryupgrade(&ddv->sdev_contents)) {
2032 rw_exit(&ddv->sdev_contents);
2033 rw_enter(&ddv->sdev_contents, RW_WRITER);
2034 }
2035 error = sdev_mknode(ddv, nm, &dv, &vattr,
2036 rvp, link, cred, SDEV_READY);
2037 rw_downgrade(&ddv->sdev_contents);
2038
2039 if (link != NULL) {
2040 kmem_free(link, strlen(link) + 1);
2041 link = NULL;
2042 }
2043
2044 if (error) {
2045 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2046 rw_exit(&ddv->sdev_contents);
2047 if (dv)
2048 SDEV_RELE(dv);
2049 goto lookup_failed;
2050 } else {
2051 goto found;
2052 }
2053 } else if (retried) {
2054 rw_exit(&ddv->sdev_contents);
2055 sdcmn_err3(("retry of lookup of %s/%s: failed\n",
2056 ddv->sdev_name, nm));
2057 if (dv)
2058 SDEV_RELE(dv);
2059 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2060 sdev_lookup_failed(ddv, nm, failed_flags);
2061 *vpp = NULLVP;
2062 return (ENOENT);
2063 }
2064 }
2065
2066 lookup_create_node:
2067 /* first thread that is doing the lookup on this node */
2068 if (callback) {
2069 ASSERT(dv == NULL);
2070 if (!rw_tryupgrade(&ddv->sdev_contents)) {
2071 rw_exit(&ddv->sdev_contents);
2072 rw_enter(&ddv->sdev_contents, RW_WRITER);
2073 }
2074 error = sdev_call_dircallback(ddv, &dv, nm, callback,
2075 flags, cred);
2076 rw_downgrade(&ddv->sdev_contents);
2077 if (error == 0) {
2078 goto found;
2079 } else {
2080 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2081 rw_exit(&ddv->sdev_contents);
2082 goto lookup_failed;
2083 }
2084 }
2085 if (!dv) {
2086 if (!rw_tryupgrade(&ddv->sdev_contents)) {
2087 rw_exit(&ddv->sdev_contents);
2088 rw_enter(&ddv->sdev_contents, RW_WRITER);
2089 }
2090 error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL,
2091 cred, SDEV_INIT);
2092 if (!dv) {
2093 rw_exit(&ddv->sdev_contents);
2094 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2095 sdev_lookup_failed(ddv, nm, failed_flags);
2096 *vpp = NULLVP;
2097 return (ENOENT);
2098 }
2099 rw_downgrade(&ddv->sdev_contents);
2100 }
2101
2102 /*
2103 * (b1) invoking devfsadm once per life time for devfsadm nodes
2104 */
2105 ASSERT(SDEV_HELD(dv));
2106
2107 if (SDEV_IS_NO_NCACHE(dv))
2108 failed_flags |= SLF_NO_NCACHE;
2109 if (sdev_reconfig_boot || !i_ddi_io_initialized() ||
2110 SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) ||
2111 ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) {
2112 ASSERT(SDEV_HELD(dv));
2113 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2114 goto nolock_notfound;
2115 }
2116
2117 /*
2118 * filter out known non-existent devices recorded
2119 * during initial reconfiguration boot for which
2120 * reconfig should not be done and lookup may
2121 * be short-circuited now.
2122 */
2123 if (sdev_lookup_filter(ddv, nm)) {
2124 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2125 goto nolock_notfound;
2126 }
2127
2128 /* bypassing devfsadm internal nodes */
2129 if (is_devfsadm_thread(lookup_thread)) {
2130 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2131 goto nolock_notfound;
2132 }
2133
2134 if (sdev_reconfig_disable) {
2135 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2136 goto nolock_notfound;
2137 }
2138
2139 error = sdev_call_devfsadmd(ddv, dv, nm);
2140 if (error == 0) {
2141 sdcmn_err8(("lookup of %s/%s by %s: reconfig\n",
2142 ddv->sdev_name, nm, curproc->p_user.u_comm));
2143 if (sdev_reconfig_verbose) {
2144 cmn_err(CE_CONT,
2145 "?lookup of %s/%s by %s: reconfig\n",
2146 ddv->sdev_name, nm, curproc->p_user.u_comm);
2147 }
2148 retried = 1;
2149 failed_flags |= SLF_REBUILT;
2150 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
2151 SDEV_SIMPLE_RELE(dv);
2152 goto tryagain;
2153 } else {
2154 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2155 goto nolock_notfound;
2156 }
2157
2158 found:
2159 ASSERT(!(dv->sdev_flags & SDEV_STALE));
2160 ASSERT(dv->sdev_state == SDEV_READY);
2161 if (vtor) {
2162 /*
2163 * Check validity of returned node
2164 */
2165 switch (vtor(dv)) {
2166 case SDEV_VTOR_VALID:
2167 break;
2168 case SDEV_VTOR_STALE:
2169 /*
2170 * The name exists, but the cache entry is
2171 * stale and needs to be re-created.
2172 */
2173 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2174 if (rw_tryupgrade(&ddv->sdev_contents) == 0) {
2175 rw_exit(&ddv->sdev_contents);
2176 rw_enter(&ddv->sdev_contents, RW_WRITER);
2177 }
2178 error = sdev_cache_update(ddv, &dv, nm,
2179 SDEV_CACHE_DELETE);
2180 rw_downgrade(&ddv->sdev_contents);
2181 if (error == 0) {
2182 dv = NULL;
2183 goto lookup_create_node;
2184 }
2185 /* FALLTHRU */
2186 case SDEV_VTOR_INVALID:
2187 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2188 sdcmn_err7(("lookup: destroy invalid "
2189 "node: %s(%p)\n", dv->sdev_name, (void *)dv));
2190 goto nolock_notfound;
2191 case SDEV_VTOR_SKIP:
2192 sdcmn_err7(("lookup: node not applicable - "
2193 "skipping: %s(%p)\n", dv->sdev_name, (void *)dv));
2194 rw_exit(&ddv->sdev_contents);
2195 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2196 SDEV_RELE(dv);
2197 goto lookup_failed;
2198 default:
2199 cmn_err(CE_PANIC,
2200 "dev fs: validator failed: %s(%p)\n",
2201 dv->sdev_name, (void *)dv);
2202 break;
2203 }
2204 }
2205
2206 rw_exit(&ddv->sdev_contents);
2207 rv = sdev_to_vp(dv, vpp);
2208 sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d "
2209 "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count,
2210 dv->sdev_state, nm, rv));
2211 return (rv);
2212
2213 nolock_notfound:
2214 /*
2215 * Destroy the node that is created for synchronization purposes.
2216 */
2217 sdcmn_err3(("devname_lookup_func: %s with state %d\n",
2218 nm, dv->sdev_state));
2219 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2220 if (dv->sdev_state == SDEV_INIT) {
2221 if (!rw_tryupgrade(&ddv->sdev_contents)) {
2222 rw_exit(&ddv->sdev_contents);
2223 rw_enter(&ddv->sdev_contents, RW_WRITER);
2224 }
2225
2226 /*
2227 * Node state may have changed during the lock
2228 * changes. Re-check.
2229 */
2230 if (dv->sdev_state == SDEV_INIT) {
2231 (void) sdev_dirdelete(ddv, dv);
2232 rw_exit(&ddv->sdev_contents);
2233 sdev_lookup_failed(ddv, nm, failed_flags);
2234 *vpp = NULL;
2235 return (ENOENT);
2236 }
2237 }
2238
2239 rw_exit(&ddv->sdev_contents);
2240 SDEV_RELE(dv);
2241
2242 lookup_failed:
2243 sdev_lookup_failed(ddv, nm, failed_flags);
2244 *vpp = NULL;
2245 return (ENOENT);
2246 }
2247
2248 /*
2249 * Given a directory node, mark all nodes beneath as
2250 * STALE, i.e. nodes that don't exist as far as new
2251 * consumers are concerned. Remove them from the
2252 * list of directory entries so that no lookup or
2253 * directory traversal will find them. The node
2254 * not deallocated so existing holds are not affected.
2255 */
2256 void
sdev_stale(struct sdev_node * ddv)2257 sdev_stale(struct sdev_node *ddv)
2258 {
2259 struct sdev_node *dv;
2260 struct vnode *vp;
2261
2262 ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2263
2264 rw_enter(&ddv->sdev_contents, RW_WRITER);
2265 for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = SDEV_NEXT_ENTRY(ddv, dv)) {
2266 vp = SDEVTOV(dv);
2267 if (vp->v_type == VDIR)
2268 sdev_stale(dv);
2269
2270 sdcmn_err9(("sdev_stale: setting stale %s\n",
2271 dv->sdev_path));
2272 dv->sdev_flags |= SDEV_STALE;
2273 avl_remove(&ddv->sdev_entries, dv);
2274 }
2275 ddv->sdev_flags |= SDEV_BUILD;
2276 rw_exit(&ddv->sdev_contents);
2277 }
2278
2279 /*
2280 * Given a directory node, clean out all the nodes beneath.
2281 * If expr is specified, clean node with names matching expr.
2282 * If SDEV_ENFORCE is specified in flags, busy nodes are made stale,
2283 * so they are excluded from future lookups.
2284 */
2285 int
sdev_cleandir(struct sdev_node * ddv,char * expr,uint_t flags)2286 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags)
2287 {
2288 int error = 0;
2289 int busy = 0;
2290 struct vnode *vp;
2291 struct sdev_node *dv, *next = NULL;
2292 int bkstore = 0;
2293 int len = 0;
2294 char *bks_name = NULL;
2295
2296 ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2297
2298 /*
2299 * We try our best to destroy all unused sdev_node's
2300 */
2301 rw_enter(&ddv->sdev_contents, RW_WRITER);
2302 for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = next) {
2303 next = SDEV_NEXT_ENTRY(ddv, dv);
2304 vp = SDEVTOV(dv);
2305
2306 if (expr && gmatch(dv->sdev_name, expr) == 0)
2307 continue;
2308
2309 if (vp->v_type == VDIR &&
2310 sdev_cleandir(dv, NULL, flags) != 0) {
2311 sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2312 dv->sdev_name));
2313 busy++;
2314 continue;
2315 }
2316
2317 if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) {
2318 sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2319 dv->sdev_name));
2320 busy++;
2321 continue;
2322 }
2323
2324 /*
2325 * at this point, either dv is not held or SDEV_ENFORCE
2326 * is specified. In either case, dv needs to be deleted
2327 */
2328 SDEV_HOLD(dv);
2329
2330 bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0;
2331 if (bkstore && (vp->v_type == VDIR))
2332 bkstore += 1;
2333
2334 if (bkstore) {
2335 len = strlen(dv->sdev_name) + 1;
2336 bks_name = kmem_alloc(len, KM_SLEEP);
2337 bcopy(dv->sdev_name, bks_name, len);
2338 }
2339
2340 error = sdev_dirdelete(ddv, dv);
2341
2342 if (error == EBUSY) {
2343 sdcmn_err9(("sdev_cleandir: dir busy\n"));
2344 busy++;
2345 }
2346
2347 /* take care the backing store clean up */
2348 if (bkstore && (error == 0)) {
2349 ASSERT(bks_name);
2350 ASSERT(ddv->sdev_attrvp);
2351
2352 if (bkstore == 1) {
2353 error = VOP_REMOVE(ddv->sdev_attrvp,
2354 bks_name, kcred, NULL, 0);
2355 } else if (bkstore == 2) {
2356 error = VOP_RMDIR(ddv->sdev_attrvp,
2357 bks_name, ddv->sdev_attrvp, kcred, NULL, 0);
2358 }
2359
2360 /* do not propagate the backing store errors */
2361 if (error) {
2362 sdcmn_err9(("sdev_cleandir: backing store"
2363 "not cleaned\n"));
2364 error = 0;
2365 }
2366
2367 bkstore = 0;
2368 kmem_free(bks_name, len);
2369 bks_name = NULL;
2370 len = 0;
2371 }
2372 }
2373
2374 ddv->sdev_flags |= SDEV_BUILD;
2375 rw_exit(&ddv->sdev_contents);
2376
2377 if (busy) {
2378 error = EBUSY;
2379 }
2380
2381 return (error);
2382 }
2383
2384 /*
2385 * a convenient wrapper for readdir() funcs
2386 */
2387 size_t
add_dir_entry(dirent64_t * de,char * nm,size_t size,ino_t ino,offset_t off)2388 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off)
2389 {
2390 size_t reclen = DIRENT64_RECLEN(strlen(nm));
2391 if (reclen > size)
2392 return (0);
2393
2394 de->d_ino = (ino64_t)ino;
2395 de->d_off = (off64_t)off + 1;
2396 de->d_reclen = (ushort_t)reclen;
2397 (void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen));
2398 return (reclen);
2399 }
2400
2401 /*
2402 * sdev_mount service routines
2403 */
2404 int
sdev_copyin_mountargs(struct mounta * uap,struct sdev_mountargs * args)2405 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args)
2406 {
2407 int error;
2408
2409 if (uap->datalen != sizeof (*args))
2410 return (EINVAL);
2411
2412 if (error = copyin(uap->dataptr, args, sizeof (*args))) {
2413 cmn_err(CE_WARN, "sdev_copyin_mountargs: can not"
2414 "get user data. error %d\n", error);
2415 return (EFAULT);
2416 }
2417
2418 return (0);
2419 }
2420
2421 #ifdef nextdp
2422 #undef nextdp
2423 #endif
2424 #define nextdp(dp) ((struct dirent64 *) \
2425 (intptr_t)((char *)(dp) + (dp)->d_reclen))
2426
2427 /*
2428 * readdir helper func
2429 */
2430 int
devname_readdir_func(vnode_t * vp,uio_t * uiop,cred_t * cred,int * eofp,int flags)2431 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp,
2432 int flags)
2433 {
2434 struct sdev_node *ddv = VTOSDEV(vp);
2435 struct sdev_node *dv;
2436 dirent64_t *dp;
2437 ulong_t outcount = 0;
2438 size_t namelen;
2439 ulong_t alloc_count;
2440 void *outbuf;
2441 struct iovec *iovp;
2442 int error = 0;
2443 size_t reclen;
2444 offset_t diroff;
2445 offset_t soff;
2446 int this_reclen;
2447 int (*vtor)(struct sdev_node *) = NULL;
2448 struct vattr attr;
2449 timestruc_t now;
2450
2451 ASSERT(ddv->sdev_attr || ddv->sdev_attrvp);
2452 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2453
2454 if (uiop->uio_loffset >= MAXOFF_T) {
2455 if (eofp)
2456 *eofp = 1;
2457 return (0);
2458 }
2459
2460 if (uiop->uio_iovcnt != 1)
2461 return (EINVAL);
2462
2463 if (vp->v_type != VDIR)
2464 return (ENOTDIR);
2465
2466 if (ddv->sdev_flags & SDEV_VTOR) {
2467 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2468 ASSERT(vtor);
2469 }
2470
2471 if (eofp != NULL)
2472 *eofp = 0;
2473
2474 soff = uiop->uio_loffset;
2475 iovp = uiop->uio_iov;
2476 alloc_count = iovp->iov_len;
2477 dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP);
2478 outcount = 0;
2479
2480 if (ddv->sdev_state == SDEV_ZOMBIE)
2481 goto get_cache;
2482
2483 if (SDEV_IS_GLOBAL(ddv)) {
2484
2485 if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) &&
2486 !sdev_reconfig_boot && (flags & SDEV_BROWSE) &&
2487 !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) &&
2488 ((moddebug & MODDEBUG_FINI_EBUSY) == 0) &&
2489 !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) &&
2490 !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2491 !sdev_reconfig_disable) {
2492 /*
2493 * invoking "devfsadm" to do system device reconfig
2494 */
2495 mutex_enter(&ddv->sdev_lookup_lock);
2496 SDEV_BLOCK_OTHERS(ddv,
2497 (SDEV_READDIR|SDEV_LGWAITING));
2498 mutex_exit(&ddv->sdev_lookup_lock);
2499
2500 sdcmn_err8(("readdir of %s by %s: reconfig\n",
2501 ddv->sdev_path, curproc->p_user.u_comm));
2502 if (sdev_reconfig_verbose) {
2503 cmn_err(CE_CONT,
2504 "?readdir of %s by %s: reconfig\n",
2505 ddv->sdev_path, curproc->p_user.u_comm);
2506 }
2507
2508 sdev_devfsadmd_thread(ddv, NULL, kcred);
2509 } else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
2510 /*
2511 * compensate the "ls" started later than "devfsadm"
2512 */
2513 mutex_enter(&ddv->sdev_lookup_lock);
2514 SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING));
2515 mutex_exit(&ddv->sdev_lookup_lock);
2516 }
2517
2518 /*
2519 * release the contents lock so that
2520 * the cache may be updated by devfsadmd
2521 */
2522 rw_exit(&ddv->sdev_contents);
2523 mutex_enter(&ddv->sdev_lookup_lock);
2524 if (SDEV_IS_READDIR(ddv))
2525 (void) sdev_wait4lookup(ddv, SDEV_READDIR);
2526 mutex_exit(&ddv->sdev_lookup_lock);
2527 rw_enter(&ddv->sdev_contents, RW_READER);
2528
2529 sdcmn_err4(("readdir of directory %s by %s\n",
2530 ddv->sdev_name, curproc->p_user.u_comm));
2531 if (ddv->sdev_flags & SDEV_BUILD) {
2532 if (SDEV_IS_PERSIST(ddv)) {
2533 error = sdev_filldir_from_store(ddv,
2534 alloc_count, cred);
2535 }
2536 ddv->sdev_flags &= ~SDEV_BUILD;
2537 }
2538 }
2539
2540 get_cache:
2541 /* handle "." and ".." */
2542 diroff = 0;
2543 if (soff == 0) {
2544 /* first time */
2545 this_reclen = DIRENT64_RECLEN(1);
2546 if (alloc_count < this_reclen) {
2547 error = EINVAL;
2548 goto done;
2549 }
2550
2551 dp->d_ino = (ino64_t)ddv->sdev_ino;
2552 dp->d_off = (off64_t)1;
2553 dp->d_reclen = (ushort_t)this_reclen;
2554
2555 (void) strncpy(dp->d_name, ".",
2556 DIRENT64_NAMELEN(this_reclen));
2557 outcount += dp->d_reclen;
2558 dp = nextdp(dp);
2559 }
2560
2561 diroff++;
2562 if (soff <= 1) {
2563 this_reclen = DIRENT64_RECLEN(2);
2564 if (alloc_count < outcount + this_reclen) {
2565 error = EINVAL;
2566 goto done;
2567 }
2568
2569 dp->d_reclen = (ushort_t)this_reclen;
2570 dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino;
2571 dp->d_off = (off64_t)2;
2572
2573 (void) strncpy(dp->d_name, "..",
2574 DIRENT64_NAMELEN(this_reclen));
2575 outcount += dp->d_reclen;
2576
2577 dp = nextdp(dp);
2578 }
2579
2580
2581 /* gets the cache */
2582 diroff++;
2583 for (dv = SDEV_FIRST_ENTRY(ddv); dv;
2584 dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) {
2585 sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n",
2586 diroff, soff, dv->sdev_name));
2587
2588 /* bypassing pre-matured nodes */
2589 if (diroff < soff || (dv->sdev_state != SDEV_READY)) {
2590 sdcmn_err3(("sdev_readdir: pre-mature node "
2591 "%s %d\n", dv->sdev_name, dv->sdev_state));
2592 continue;
2593 }
2594
2595 /*
2596 * Check validity of node
2597 * Drop invalid and nodes to be skipped.
2598 * A node the validator indicates as stale needs
2599 * to be returned as presumably the node name itself
2600 * is valid and the node data itself will be refreshed
2601 * on lookup. An application performing a readdir then
2602 * stat on each entry should thus always see consistent
2603 * data. In any case, it is not possible to synchronize
2604 * with dynamic kernel state, and any view we return can
2605 * never be anything more than a snapshot at a point in time.
2606 */
2607 if (vtor) {
2608 switch (vtor(dv)) {
2609 case SDEV_VTOR_VALID:
2610 break;
2611 case SDEV_VTOR_INVALID:
2612 case SDEV_VTOR_SKIP:
2613 continue;
2614 case SDEV_VTOR_STALE:
2615 sdcmn_err3(("sdev_readir: %s stale\n",
2616 dv->sdev_name));
2617 break;
2618 default:
2619 cmn_err(CE_PANIC,
2620 "dev fs: validator failed: %s(%p)\n",
2621 dv->sdev_name, (void *)dv);
2622 break;
2623 /*NOTREACHED*/
2624 }
2625 }
2626
2627 namelen = strlen(dv->sdev_name);
2628 reclen = DIRENT64_RECLEN(namelen);
2629 if (outcount + reclen > alloc_count) {
2630 goto full;
2631 }
2632 dp->d_reclen = (ushort_t)reclen;
2633 dp->d_ino = (ino64_t)dv->sdev_ino;
2634 dp->d_off = (off64_t)diroff + 1;
2635 (void) strncpy(dp->d_name, dv->sdev_name,
2636 DIRENT64_NAMELEN(reclen));
2637 outcount += reclen;
2638 dp = nextdp(dp);
2639 }
2640
2641 full:
2642 sdcmn_err4(("sdev_readdir: moving %lu bytes: "
2643 "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff,
2644 (void *)dv));
2645
2646 if (outcount)
2647 error = uiomove(outbuf, outcount, UIO_READ, uiop);
2648
2649 if (!error) {
2650 uiop->uio_loffset = diroff;
2651 if (eofp)
2652 *eofp = dv ? 0 : 1;
2653 }
2654
2655
2656 if (ddv->sdev_attrvp) {
2657 gethrestime(&now);
2658 attr.va_ctime = now;
2659 attr.va_atime = now;
2660 attr.va_mask = AT_CTIME|AT_ATIME;
2661
2662 (void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL);
2663 }
2664 done:
2665 kmem_free(outbuf, alloc_count);
2666 return (error);
2667 }
2668
2669 static int
sdev_modctl_lookup(const char * path,vnode_t ** r_vp)2670 sdev_modctl_lookup(const char *path, vnode_t **r_vp)
2671 {
2672 vnode_t *vp;
2673 vnode_t *cvp;
2674 struct sdev_node *svp;
2675 char *nm;
2676 struct pathname pn;
2677 int error;
2678 int persisted = 0;
2679
2680 ASSERT(INGLOBALZONE(curproc));
2681
2682 if (error = pn_get((char *)path, UIO_SYSSPACE, &pn))
2683 return (error);
2684 nm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2685
2686 vp = rootdir;
2687 VN_HOLD(vp);
2688
2689 while (pn_pathleft(&pn)) {
2690 ASSERT(vp->v_type == VDIR || vp->v_type == VLNK);
2691 (void) pn_getcomponent(&pn, nm);
2692
2693 /*
2694 * Deal with the .. special case where we may be
2695 * traversing up across a mount point, to the
2696 * root of this filesystem or global root.
2697 */
2698 if (nm[0] == '.' && nm[1] == '.' && nm[2] == 0) {
2699 checkforroot:
2700 if (VN_CMP(vp, rootdir)) {
2701 nm[1] = 0;
2702 } else if (vp->v_flag & VROOT) {
2703 vfs_t *vfsp;
2704 cvp = vp;
2705 vfsp = cvp->v_vfsp;
2706 vfs_rlock_wait(vfsp);
2707 vp = cvp->v_vfsp->vfs_vnodecovered;
2708 if (vp == NULL ||
2709 (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) {
2710 vfs_unlock(vfsp);
2711 VN_RELE(cvp);
2712 error = EIO;
2713 break;
2714 }
2715 VN_HOLD(vp);
2716 vfs_unlock(vfsp);
2717 VN_RELE(cvp);
2718 cvp = NULL;
2719 goto checkforroot;
2720 }
2721 }
2722
2723 error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL,
2724 NULL, NULL);
2725 if (error) {
2726 VN_RELE(vp);
2727 break;
2728 }
2729
2730 /* traverse mount points encountered on our journey */
2731 if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) {
2732 VN_RELE(vp);
2733 VN_RELE(cvp);
2734 break;
2735 }
2736
2737 /*
2738 * symbolic link, can be either relative and absolute
2739 */
2740 if ((cvp->v_type == VLNK) && pn_pathleft(&pn)) {
2741 struct pathname linkpath;
2742 pn_alloc(&linkpath);
2743 if (error = pn_getsymlink(cvp, &linkpath, kcred)) {
2744 pn_free(&linkpath);
2745 break;
2746 }
2747 if (pn_pathleft(&linkpath) == 0)
2748 (void) pn_set(&linkpath, ".");
2749 error = pn_insert(&pn, &linkpath, strlen(nm));
2750 pn_free(&linkpath);
2751 if (pn.pn_pathlen == 0) {
2752 VN_RELE(vp);
2753 return (ENOENT);
2754 }
2755 if (pn.pn_path[0] == '/') {
2756 pn_skipslash(&pn);
2757 VN_RELE(vp);
2758 VN_RELE(cvp);
2759 vp = rootdir;
2760 VN_HOLD(vp);
2761 } else {
2762 VN_RELE(cvp);
2763 }
2764 continue;
2765 }
2766
2767 VN_RELE(vp);
2768
2769 /*
2770 * Direct the operation to the persisting filesystem
2771 * underlying /dev. Bail if we encounter a
2772 * non-persistent dev entity here.
2773 */
2774 if (cvp->v_vfsp->vfs_fstype == devtype) {
2775
2776 if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) {
2777 error = ENOENT;
2778 VN_RELE(cvp);
2779 break;
2780 }
2781
2782 if (VTOSDEV(cvp) == NULL) {
2783 error = ENOENT;
2784 VN_RELE(cvp);
2785 break;
2786 }
2787 svp = VTOSDEV(cvp);
2788 if ((vp = svp->sdev_attrvp) == NULL) {
2789 error = ENOENT;
2790 VN_RELE(cvp);
2791 break;
2792 }
2793 persisted = 1;
2794 VN_HOLD(vp);
2795 VN_RELE(cvp);
2796 cvp = vp;
2797 }
2798
2799 vp = cvp;
2800 pn_skipslash(&pn);
2801 }
2802
2803 kmem_free(nm, MAXNAMELEN);
2804 pn_free(&pn);
2805
2806 if (error)
2807 return (error);
2808
2809 /*
2810 * Only return persisted nodes in the filesystem underlying /dev.
2811 */
2812 if (!persisted) {
2813 VN_RELE(vp);
2814 return (ENOENT);
2815 }
2816
2817 *r_vp = vp;
2818 return (0);
2819 }
2820
2821 int
sdev_modctl_readdir(const char * dir,char *** dirlistp,int * npathsp,int * npathsp_alloc,int checking_empty)2822 sdev_modctl_readdir(const char *dir, char ***dirlistp,
2823 int *npathsp, int *npathsp_alloc, int checking_empty)
2824 {
2825 char **pathlist = NULL;
2826 char **newlist = NULL;
2827 int npaths = 0;
2828 int npaths_alloc = 0;
2829 dirent64_t *dbuf = NULL;
2830 int n;
2831 char *s;
2832 int error;
2833 vnode_t *vp;
2834 int eof;
2835 struct iovec iov;
2836 struct uio uio;
2837 struct dirent64 *dp;
2838 size_t dlen;
2839 size_t dbuflen;
2840 int ndirents = 64;
2841 char *nm;
2842
2843 error = sdev_modctl_lookup(dir, &vp);
2844 sdcmn_err11(("modctl readdir: %s by %s: %s\n",
2845 dir, curproc->p_user.u_comm,
2846 (error == 0) ? "ok" : "failed"));
2847 if (error)
2848 return (error);
2849
2850 dlen = ndirents * (sizeof (*dbuf));
2851 dbuf = kmem_alloc(dlen, KM_SLEEP);
2852
2853 uio.uio_iov = &iov;
2854 uio.uio_iovcnt = 1;
2855 uio.uio_segflg = UIO_SYSSPACE;
2856 uio.uio_fmode = 0;
2857 uio.uio_extflg = UIO_COPY_CACHED;
2858 uio.uio_loffset = 0;
2859 uio.uio_llimit = MAXOFFSET_T;
2860
2861 eof = 0;
2862 error = 0;
2863 while (!error && !eof) {
2864 uio.uio_resid = dlen;
2865 iov.iov_base = (char *)dbuf;
2866 iov.iov_len = dlen;
2867
2868 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2869 error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0);
2870 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2871
2872 dbuflen = dlen - uio.uio_resid;
2873
2874 if (error || dbuflen == 0)
2875 break;
2876
2877 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
2878 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
2879
2880 nm = dp->d_name;
2881
2882 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
2883 continue;
2884 if (npaths == npaths_alloc) {
2885 npaths_alloc += 64;
2886 newlist = (char **)
2887 kmem_zalloc((npaths_alloc + 1) *
2888 sizeof (char *), KM_SLEEP);
2889 if (pathlist) {
2890 bcopy(pathlist, newlist,
2891 npaths * sizeof (char *));
2892 kmem_free(pathlist,
2893 (npaths + 1) * sizeof (char *));
2894 }
2895 pathlist = newlist;
2896 }
2897 n = strlen(nm) + 1;
2898 s = kmem_alloc(n, KM_SLEEP);
2899 bcopy(nm, s, n);
2900 pathlist[npaths++] = s;
2901 sdcmn_err11((" %s/%s\n", dir, s));
2902
2903 /* if checking empty, one entry is as good as many */
2904 if (checking_empty) {
2905 eof = 1;
2906 break;
2907 }
2908 }
2909 }
2910
2911 exit:
2912 VN_RELE(vp);
2913
2914 if (dbuf)
2915 kmem_free(dbuf, dlen);
2916
2917 if (error)
2918 return (error);
2919
2920 *dirlistp = pathlist;
2921 *npathsp = npaths;
2922 *npathsp_alloc = npaths_alloc;
2923
2924 return (0);
2925 }
2926
2927 void
sdev_modctl_readdir_free(char ** pathlist,int npaths,int npaths_alloc)2928 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc)
2929 {
2930 int i, n;
2931
2932 for (i = 0; i < npaths; i++) {
2933 n = strlen(pathlist[i]) + 1;
2934 kmem_free(pathlist[i], n);
2935 }
2936
2937 kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *));
2938 }
2939
2940 int
sdev_modctl_devexists(const char * path)2941 sdev_modctl_devexists(const char *path)
2942 {
2943 vnode_t *vp;
2944 int error;
2945
2946 error = sdev_modctl_lookup(path, &vp);
2947 sdcmn_err11(("modctl dev exists: %s by %s: %s\n",
2948 path, curproc->p_user.u_comm,
2949 (error == 0) ? "ok" : "failed"));
2950 if (error == 0)
2951 VN_RELE(vp);
2952
2953 return (error);
2954 }
2955
2956 extern int sdev_vnodeops_tbl_size;
2957
2958 /*
2959 * construct a new template with overrides from vtab
2960 */
2961 static fs_operation_def_t *
sdev_merge_vtab(const fs_operation_def_t tab[])2962 sdev_merge_vtab(const fs_operation_def_t tab[])
2963 {
2964 fs_operation_def_t *new;
2965 const fs_operation_def_t *tab_entry;
2966
2967 /* make a copy of standard vnode ops table */
2968 new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP);
2969 bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size);
2970
2971 /* replace the overrides from tab */
2972 for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) {
2973 fs_operation_def_t *std_entry = new;
2974 while (std_entry->name) {
2975 if (strcmp(tab_entry->name, std_entry->name) == 0) {
2976 std_entry->func = tab_entry->func;
2977 break;
2978 }
2979 std_entry++;
2980 }
2981 if (std_entry->name == NULL)
2982 cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.",
2983 tab_entry->name);
2984 }
2985
2986 return (new);
2987 }
2988
2989 /* free memory allocated by sdev_merge_vtab */
2990 static void
sdev_free_vtab(fs_operation_def_t * new)2991 sdev_free_vtab(fs_operation_def_t *new)
2992 {
2993 kmem_free(new, sdev_vnodeops_tbl_size);
2994 }
2995
2996 /*
2997 * a generic setattr() function
2998 *
2999 * note: flags only supports AT_UID and AT_GID.
3000 * Future enhancements can be done for other types, e.g. AT_MODE
3001 */
3002 int
devname_setattr_func(struct vnode * vp,struct vattr * vap,int flags,struct cred * cred,int (* callback)(struct sdev_node *,struct vattr *,int),int protocol)3003 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags,
3004 struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *,
3005 int), int protocol)
3006 {
3007 struct sdev_node *dv = VTOSDEV(vp);
3008 struct sdev_node *parent = dv->sdev_dotdot;
3009 struct vattr *get;
3010 uint_t mask = vap->va_mask;
3011 int error;
3012
3013 /* some sanity checks */
3014 if (vap->va_mask & AT_NOSET)
3015 return (EINVAL);
3016
3017 if (vap->va_mask & AT_SIZE) {
3018 if (vp->v_type == VDIR) {
3019 return (EISDIR);
3020 }
3021 }
3022
3023 /* no need to set attribute, but do not fail either */
3024 ASSERT(parent);
3025 rw_enter(&parent->sdev_contents, RW_READER);
3026 if (dv->sdev_state == SDEV_ZOMBIE) {
3027 rw_exit(&parent->sdev_contents);
3028 return (0);
3029 }
3030
3031 /* If backing store exists, just set it. */
3032 if (dv->sdev_attrvp) {
3033 rw_exit(&parent->sdev_contents);
3034 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3035 }
3036
3037 /*
3038 * Otherwise, for nodes with the persistence attribute, create it.
3039 */
3040 ASSERT(dv->sdev_attr);
3041 if (SDEV_IS_PERSIST(dv) ||
3042 ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) {
3043 sdev_vattr_merge(dv, vap);
3044 rw_enter(&dv->sdev_contents, RW_WRITER);
3045 error = sdev_shadow_node(dv, cred);
3046 rw_exit(&dv->sdev_contents);
3047 rw_exit(&parent->sdev_contents);
3048
3049 if (error)
3050 return (error);
3051 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3052 }
3053
3054
3055 /*
3056 * sdev_attr was allocated in sdev_mknode
3057 */
3058 rw_enter(&dv->sdev_contents, RW_WRITER);
3059 error = secpolicy_vnode_setattr(cred, vp, vap,
3060 dv->sdev_attr, flags, sdev_unlocked_access, dv);
3061 if (error) {
3062 rw_exit(&dv->sdev_contents);
3063 rw_exit(&parent->sdev_contents);
3064 return (error);
3065 }
3066
3067 get = dv->sdev_attr;
3068 if (mask & AT_MODE) {
3069 get->va_mode &= S_IFMT;
3070 get->va_mode |= vap->va_mode & ~S_IFMT;
3071 }
3072
3073 if ((mask & AT_UID) || (mask & AT_GID)) {
3074 if (mask & AT_UID)
3075 get->va_uid = vap->va_uid;
3076 if (mask & AT_GID)
3077 get->va_gid = vap->va_gid;
3078 /*
3079 * a callback must be provided if the protocol is set
3080 */
3081 if ((protocol & AT_UID) || (protocol & AT_GID)) {
3082 ASSERT(callback);
3083 error = callback(dv, get, protocol);
3084 if (error) {
3085 rw_exit(&dv->sdev_contents);
3086 rw_exit(&parent->sdev_contents);
3087 return (error);
3088 }
3089 }
3090 }
3091
3092 if (mask & AT_ATIME)
3093 get->va_atime = vap->va_atime;
3094 if (mask & AT_MTIME)
3095 get->va_mtime = vap->va_mtime;
3096 if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) {
3097 gethrestime(&get->va_ctime);
3098 }
3099
3100 sdev_vattr_merge(dv, get);
3101 rw_exit(&dv->sdev_contents);
3102 rw_exit(&parent->sdev_contents);
3103 return (0);
3104 }
3105
3106 /*
3107 * a generic inactive() function
3108 */
3109 /*ARGSUSED*/
3110 void
devname_inactive_func(struct vnode * vp,struct cred * cred,void (* callback)(struct vnode *))3111 devname_inactive_func(struct vnode *vp, struct cred *cred,
3112 void (*callback)(struct vnode *))
3113 {
3114 int clean;
3115 struct sdev_node *dv = VTOSDEV(vp);
3116 struct sdev_node *ddv = dv->sdev_dotdot;
3117 int state;
3118
3119 rw_enter(&ddv->sdev_contents, RW_WRITER);
3120 state = dv->sdev_state;
3121
3122 mutex_enter(&vp->v_lock);
3123 ASSERT(vp->v_count >= 1);
3124
3125 if (vp->v_count == 1 && callback != NULL)
3126 callback(vp);
3127
3128 clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE);
3129
3130 /*
3131 * last ref count on the ZOMBIE node is released.
3132 * clean up the sdev_node, and
3133 * release the hold on the backing store node so that
3134 * the ZOMBIE backing stores also cleaned out.
3135 */
3136 if (clean) {
3137 ASSERT(ddv);
3138
3139 ddv->sdev_nlink--;
3140 if (vp->v_type == VDIR) {
3141 dv->sdev_nlink--;
3142 }
3143 if ((dv->sdev_flags & SDEV_STALE) == 0)
3144 avl_remove(&ddv->sdev_entries, dv);
3145 dv->sdev_nlink--;
3146 --vp->v_count;
3147 mutex_exit(&vp->v_lock);
3148 sdev_nodedestroy(dv, 0);
3149 } else {
3150 --vp->v_count;
3151 mutex_exit(&vp->v_lock);
3152 }
3153 rw_exit(&ddv->sdev_contents);
3154 }
3155