1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28
29 /*
30 * University Copyright- Copyright (c) 1982, 1986, 1988
31 * The Regents of the University of California
32 * All Rights Reserved
33 *
34 * University Acknowledgment- Portions of this document are derived from
35 * software developed by the University of California, Berkeley, and its
36 * contributors.
37 */
38
39 #include <sys/types.h>
40 #include <sys/param.h>
41 #include <sys/t_lock.h>
42 #include <sys/errno.h>
43 #include <sys/cred.h>
44 #include <sys/user.h>
45 #include <sys/uio.h>
46 #include <sys/file.h>
47 #include <sys/pathname.h>
48 #include <sys/vfs.h>
49 #include <sys/vfs_opreg.h>
50 #include <sys/vnode.h>
51 #include <sys/rwstlock.h>
52 #include <sys/fem.h>
53 #include <sys/stat.h>
54 #include <sys/mode.h>
55 #include <sys/conf.h>
56 #include <sys/sysmacros.h>
57 #include <sys/cmn_err.h>
58 #include <sys/systm.h>
59 #include <sys/kmem.h>
60 #include <sys/debug.h>
61 #include <c2/audit.h>
62 #include <sys/acl.h>
63 #include <sys/nbmlock.h>
64 #include <sys/fcntl.h>
65 #include <fs/fs_subr.h>
66 #include <sys/taskq.h>
67 #include <fs/fs_reparse.h>
68
69 /* Determine if this vnode is a file that is read-only */
70 #define ISROFILE(vp) \
71 ((vp)->v_type != VCHR && (vp)->v_type != VBLK && \
72 (vp)->v_type != VFIFO && vn_is_readonly(vp))
73
74 /* Tunable via /etc/system; used only by admin/install */
75 int nfs_global_client_only;
76
77 /*
78 * Array of vopstats_t for per-FS-type vopstats. This array has the same
79 * number of entries as and parallel to the vfssw table. (Arguably, it could
80 * be part of the vfssw table.) Once it's initialized, it's accessed using
81 * the same fstype index that is used to index into the vfssw table.
82 */
83 vopstats_t **vopstats_fstype;
84
85 /* vopstats initialization template used for fast initialization via bcopy() */
86 static vopstats_t *vs_templatep;
87
88 /* Kmem cache handle for vsk_anchor_t allocations */
89 kmem_cache_t *vsk_anchor_cache;
90
91 /* file events cleanup routine */
92 extern void free_fopdata(vnode_t *);
93
94 /*
95 * Root of AVL tree for the kstats associated with vopstats. Lock protects
96 * updates to vsktat_tree.
97 */
98 avl_tree_t vskstat_tree;
99 kmutex_t vskstat_tree_lock;
100
101 /* Global variable which enables/disables the vopstats collection */
102 int vopstats_enabled = 1;
103
104 /*
105 * forward declarations for internal vnode specific data (vsd)
106 */
107 static void *vsd_realloc(void *, size_t, size_t);
108
109 /*
110 * forward declarations for reparse point functions
111 */
112 static int fs_reparse_mark(char *target, vattr_t *vap, xvattr_t *xvattr);
113
114 /*
115 * VSD -- VNODE SPECIFIC DATA
116 * The v_data pointer is typically used by a file system to store a
117 * pointer to the file system's private node (e.g. ufs inode, nfs rnode).
118 * However, there are times when additional project private data needs
119 * to be stored separately from the data (node) pointed to by v_data.
120 * This additional data could be stored by the file system itself or
121 * by a completely different kernel entity. VSD provides a way for
122 * callers to obtain a key and store a pointer to private data associated
123 * with a vnode.
124 *
125 * Callers are responsible for protecting the vsd by holding v_vsd_lock
126 * for calls to vsd_set() and vsd_get().
127 */
128
129 /*
130 * vsd_lock protects:
131 * vsd_nkeys - creation and deletion of vsd keys
132 * vsd_list - insertion and deletion of vsd_node in the vsd_list
133 * vsd_destructor - adding and removing destructors to the list
134 */
135 static kmutex_t vsd_lock;
136 static uint_t vsd_nkeys; /* size of destructor array */
137 /* list of vsd_node's */
138 static list_t *vsd_list = NULL;
139 /* per-key destructor funcs */
140 static void (**vsd_destructor)(void *);
141
142 /*
143 * The following is the common set of actions needed to update the
144 * vopstats structure from a vnode op. Both VOPSTATS_UPDATE() and
145 * VOPSTATS_UPDATE_IO() do almost the same thing, except for the
146 * recording of the bytes transferred. Since the code is similar
147 * but small, it is nearly a duplicate. Consequently any changes
148 * to one may need to be reflected in the other.
149 * Rundown of the variables:
150 * vp - Pointer to the vnode
151 * counter - Partial name structure member to update in vopstats for counts
152 * bytecounter - Partial name structure member to update in vopstats for bytes
153 * bytesval - Value to update in vopstats for bytes
154 * fstype - Index into vsanchor_fstype[], same as index into vfssw[]
155 * vsp - Pointer to vopstats structure (either in vfs or vsanchor_fstype[i])
156 */
157
158 #define VOPSTATS_UPDATE(vp, counter) { \
159 vfs_t *vfsp = (vp)->v_vfsp; \
160 if (vfsp && vfsp->vfs_implp && \
161 (vfsp->vfs_flag & VFS_STATS) && (vp)->v_type != VBAD) { \
162 vopstats_t *vsp = &vfsp->vfs_vopstats; \
163 uint64_t *stataddr = &(vsp->n##counter.value.ui64); \
164 extern void __dtrace_probe___fsinfo_##counter(vnode_t *, \
165 size_t, uint64_t *); \
166 __dtrace_probe___fsinfo_##counter(vp, 0, stataddr); \
167 (*stataddr)++; \
168 if ((vsp = vfsp->vfs_fstypevsp) != NULL) { \
169 vsp->n##counter.value.ui64++; \
170 } \
171 } \
172 }
173
174 #define VOPSTATS_UPDATE_IO(vp, counter, bytecounter, bytesval) { \
175 vfs_t *vfsp = (vp)->v_vfsp; \
176 if (vfsp && vfsp->vfs_implp && \
177 (vfsp->vfs_flag & VFS_STATS) && (vp)->v_type != VBAD) { \
178 vopstats_t *vsp = &vfsp->vfs_vopstats; \
179 uint64_t *stataddr = &(vsp->n##counter.value.ui64); \
180 extern void __dtrace_probe___fsinfo_##counter(vnode_t *, \
181 size_t, uint64_t *); \
182 __dtrace_probe___fsinfo_##counter(vp, bytesval, stataddr); \
183 (*stataddr)++; \
184 vsp->bytecounter.value.ui64 += bytesval; \
185 if ((vsp = vfsp->vfs_fstypevsp) != NULL) { \
186 vsp->n##counter.value.ui64++; \
187 vsp->bytecounter.value.ui64 += bytesval; \
188 } \
189 } \
190 }
191
192 /*
193 * If the filesystem does not support XIDs map credential
194 * If the vfsp is NULL, perhaps we should also map?
195 */
196 #define VOPXID_MAP_CR(vp, cr) { \
197 vfs_t *vfsp = (vp)->v_vfsp; \
198 if (vfsp != NULL && (vfsp->vfs_flag & VFS_XID) == 0) \
199 cr = crgetmapped(cr); \
200 }
201
202 /*
203 * Convert stat(2) formats to vnode types and vice versa. (Knows about
204 * numerical order of S_IFMT and vnode types.)
205 */
206 enum vtype iftovt_tab[] = {
207 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
208 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON
209 };
210
211 ushort_t vttoif_tab[] = {
212 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO,
213 S_IFDOOR, 0, S_IFSOCK, S_IFPORT, 0
214 };
215
216 /*
217 * The system vnode cache.
218 */
219
220 kmem_cache_t *vn_cache;
221
222
223 /*
224 * Vnode operations vector.
225 */
226
227 static const fs_operation_trans_def_t vn_ops_table[] = {
228 VOPNAME_OPEN, offsetof(struct vnodeops, vop_open),
229 fs_nosys, fs_nosys,
230
231 VOPNAME_CLOSE, offsetof(struct vnodeops, vop_close),
232 fs_nosys, fs_nosys,
233
234 VOPNAME_READ, offsetof(struct vnodeops, vop_read),
235 fs_nosys, fs_nosys,
236
237 VOPNAME_WRITE, offsetof(struct vnodeops, vop_write),
238 fs_nosys, fs_nosys,
239
240 VOPNAME_IOCTL, offsetof(struct vnodeops, vop_ioctl),
241 fs_nosys, fs_nosys,
242
243 VOPNAME_SETFL, offsetof(struct vnodeops, vop_setfl),
244 fs_setfl, fs_nosys,
245
246 VOPNAME_GETATTR, offsetof(struct vnodeops, vop_getattr),
247 fs_nosys, fs_nosys,
248
249 VOPNAME_SETATTR, offsetof(struct vnodeops, vop_setattr),
250 fs_nosys, fs_nosys,
251
252 VOPNAME_ACCESS, offsetof(struct vnodeops, vop_access),
253 fs_nosys, fs_nosys,
254
255 VOPNAME_LOOKUP, offsetof(struct vnodeops, vop_lookup),
256 fs_nosys, fs_nosys,
257
258 VOPNAME_CREATE, offsetof(struct vnodeops, vop_create),
259 fs_nosys, fs_nosys,
260
261 VOPNAME_REMOVE, offsetof(struct vnodeops, vop_remove),
262 fs_nosys, fs_nosys,
263
264 VOPNAME_LINK, offsetof(struct vnodeops, vop_link),
265 fs_nosys, fs_nosys,
266
267 VOPNAME_RENAME, offsetof(struct vnodeops, vop_rename),
268 fs_nosys, fs_nosys,
269
270 VOPNAME_MKDIR, offsetof(struct vnodeops, vop_mkdir),
271 fs_nosys, fs_nosys,
272
273 VOPNAME_RMDIR, offsetof(struct vnodeops, vop_rmdir),
274 fs_nosys, fs_nosys,
275
276 VOPNAME_READDIR, offsetof(struct vnodeops, vop_readdir),
277 fs_nosys, fs_nosys,
278
279 VOPNAME_SYMLINK, offsetof(struct vnodeops, vop_symlink),
280 fs_nosys, fs_nosys,
281
282 VOPNAME_READLINK, offsetof(struct vnodeops, vop_readlink),
283 fs_nosys, fs_nosys,
284
285 VOPNAME_FSYNC, offsetof(struct vnodeops, vop_fsync),
286 fs_nosys, fs_nosys,
287
288 VOPNAME_INACTIVE, offsetof(struct vnodeops, vop_inactive),
289 fs_nosys, fs_nosys,
290
291 VOPNAME_FID, offsetof(struct vnodeops, vop_fid),
292 fs_nosys, fs_nosys,
293
294 VOPNAME_RWLOCK, offsetof(struct vnodeops, vop_rwlock),
295 fs_rwlock, fs_rwlock,
296
297 VOPNAME_RWUNLOCK, offsetof(struct vnodeops, vop_rwunlock),
298 (fs_generic_func_p) fs_rwunlock,
299 (fs_generic_func_p) fs_rwunlock, /* no errors allowed */
300
301 VOPNAME_SEEK, offsetof(struct vnodeops, vop_seek),
302 fs_nosys, fs_nosys,
303
304 VOPNAME_CMP, offsetof(struct vnodeops, vop_cmp),
305 fs_cmp, fs_cmp, /* no errors allowed */
306
307 VOPNAME_FRLOCK, offsetof(struct vnodeops, vop_frlock),
308 fs_frlock, fs_nosys,
309
310 VOPNAME_SPACE, offsetof(struct vnodeops, vop_space),
311 fs_nosys, fs_nosys,
312
313 VOPNAME_REALVP, offsetof(struct vnodeops, vop_realvp),
314 fs_nosys, fs_nosys,
315
316 VOPNAME_GETPAGE, offsetof(struct vnodeops, vop_getpage),
317 fs_nosys, fs_nosys,
318
319 VOPNAME_PUTPAGE, offsetof(struct vnodeops, vop_putpage),
320 fs_nosys, fs_nosys,
321
322 VOPNAME_MAP, offsetof(struct vnodeops, vop_map),
323 (fs_generic_func_p) fs_nosys_map,
324 (fs_generic_func_p) fs_nosys_map,
325
326 VOPNAME_ADDMAP, offsetof(struct vnodeops, vop_addmap),
327 (fs_generic_func_p) fs_nosys_addmap,
328 (fs_generic_func_p) fs_nosys_addmap,
329
330 VOPNAME_DELMAP, offsetof(struct vnodeops, vop_delmap),
331 fs_nosys, fs_nosys,
332
333 VOPNAME_POLL, offsetof(struct vnodeops, vop_poll),
334 (fs_generic_func_p) fs_poll, (fs_generic_func_p) fs_nosys_poll,
335
336 VOPNAME_DUMP, offsetof(struct vnodeops, vop_dump),
337 fs_nosys, fs_nosys,
338
339 VOPNAME_PATHCONF, offsetof(struct vnodeops, vop_pathconf),
340 fs_pathconf, fs_nosys,
341
342 VOPNAME_PAGEIO, offsetof(struct vnodeops, vop_pageio),
343 fs_nosys, fs_nosys,
344
345 VOPNAME_DUMPCTL, offsetof(struct vnodeops, vop_dumpctl),
346 fs_nosys, fs_nosys,
347
348 VOPNAME_DISPOSE, offsetof(struct vnodeops, vop_dispose),
349 (fs_generic_func_p) fs_dispose,
350 (fs_generic_func_p) fs_nodispose,
351
352 VOPNAME_SETSECATTR, offsetof(struct vnodeops, vop_setsecattr),
353 fs_nosys, fs_nosys,
354
355 VOPNAME_GETSECATTR, offsetof(struct vnodeops, vop_getsecattr),
356 fs_fab_acl, fs_nosys,
357
358 VOPNAME_SHRLOCK, offsetof(struct vnodeops, vop_shrlock),
359 fs_shrlock, fs_nosys,
360
361 VOPNAME_VNEVENT, offsetof(struct vnodeops, vop_vnevent),
362 (fs_generic_func_p) fs_vnevent_nosupport,
363 (fs_generic_func_p) fs_vnevent_nosupport,
364
365 VOPNAME_REQZCBUF, offsetof(struct vnodeops, vop_reqzcbuf),
366 fs_nosys, fs_nosys,
367
368 VOPNAME_RETZCBUF, offsetof(struct vnodeops, vop_retzcbuf),
369 fs_nosys, fs_nosys,
370
371 NULL, 0, NULL, NULL
372 };
373
374 /* Extensible attribute (xva) routines. */
375
376 /*
377 * Zero out the structure, set the size of the requested/returned bitmaps,
378 * set AT_XVATTR in the embedded vattr_t's va_mask, and set up the pointer
379 * to the returned attributes array.
380 */
381 void
xva_init(xvattr_t * xvap)382 xva_init(xvattr_t *xvap)
383 {
384 bzero(xvap, sizeof (xvattr_t));
385 xvap->xva_mapsize = XVA_MAPSIZE;
386 xvap->xva_magic = XVA_MAGIC;
387 xvap->xva_vattr.va_mask = AT_XVATTR;
388 xvap->xva_rtnattrmapp = &(xvap->xva_rtnattrmap)[0];
389 }
390
391 /*
392 * If AT_XVATTR is set, returns a pointer to the embedded xoptattr_t
393 * structure. Otherwise, returns NULL.
394 */
395 xoptattr_t *
xva_getxoptattr(xvattr_t * xvap)396 xva_getxoptattr(xvattr_t *xvap)
397 {
398 xoptattr_t *xoap = NULL;
399 if (xvap->xva_vattr.va_mask & AT_XVATTR)
400 xoap = &xvap->xva_xoptattrs;
401 return (xoap);
402 }
403
404 /*
405 * Used by the AVL routines to compare two vsk_anchor_t structures in the tree.
406 * We use the f_fsid reported by VFS_STATVFS() since we use that for the
407 * kstat name.
408 */
409 static int
vska_compar(const void * n1,const void * n2)410 vska_compar(const void *n1, const void *n2)
411 {
412 int ret;
413 ulong_t p1 = ((vsk_anchor_t *)n1)->vsk_fsid;
414 ulong_t p2 = ((vsk_anchor_t *)n2)->vsk_fsid;
415
416 if (p1 < p2) {
417 ret = -1;
418 } else if (p1 > p2) {
419 ret = 1;
420 } else {
421 ret = 0;
422 }
423
424 return (ret);
425 }
426
427 /*
428 * Used to create a single template which will be bcopy()ed to a newly
429 * allocated vsanchor_combo_t structure in new_vsanchor(), below.
430 */
431 static vopstats_t *
create_vopstats_template()432 create_vopstats_template()
433 {
434 vopstats_t *vsp;
435
436 vsp = kmem_alloc(sizeof (vopstats_t), KM_SLEEP);
437 bzero(vsp, sizeof (*vsp)); /* Start fresh */
438
439 /* VOP_OPEN */
440 kstat_named_init(&vsp->nopen, "nopen", KSTAT_DATA_UINT64);
441 /* VOP_CLOSE */
442 kstat_named_init(&vsp->nclose, "nclose", KSTAT_DATA_UINT64);
443 /* VOP_READ I/O */
444 kstat_named_init(&vsp->nread, "nread", KSTAT_DATA_UINT64);
445 kstat_named_init(&vsp->read_bytes, "read_bytes", KSTAT_DATA_UINT64);
446 /* VOP_WRITE I/O */
447 kstat_named_init(&vsp->nwrite, "nwrite", KSTAT_DATA_UINT64);
448 kstat_named_init(&vsp->write_bytes, "write_bytes", KSTAT_DATA_UINT64);
449 /* VOP_IOCTL */
450 kstat_named_init(&vsp->nioctl, "nioctl", KSTAT_DATA_UINT64);
451 /* VOP_SETFL */
452 kstat_named_init(&vsp->nsetfl, "nsetfl", KSTAT_DATA_UINT64);
453 /* VOP_GETATTR */
454 kstat_named_init(&vsp->ngetattr, "ngetattr", KSTAT_DATA_UINT64);
455 /* VOP_SETATTR */
456 kstat_named_init(&vsp->nsetattr, "nsetattr", KSTAT_DATA_UINT64);
457 /* VOP_ACCESS */
458 kstat_named_init(&vsp->naccess, "naccess", KSTAT_DATA_UINT64);
459 /* VOP_LOOKUP */
460 kstat_named_init(&vsp->nlookup, "nlookup", KSTAT_DATA_UINT64);
461 /* VOP_CREATE */
462 kstat_named_init(&vsp->ncreate, "ncreate", KSTAT_DATA_UINT64);
463 /* VOP_REMOVE */
464 kstat_named_init(&vsp->nremove, "nremove", KSTAT_DATA_UINT64);
465 /* VOP_LINK */
466 kstat_named_init(&vsp->nlink, "nlink", KSTAT_DATA_UINT64);
467 /* VOP_RENAME */
468 kstat_named_init(&vsp->nrename, "nrename", KSTAT_DATA_UINT64);
469 /* VOP_MKDIR */
470 kstat_named_init(&vsp->nmkdir, "nmkdir", KSTAT_DATA_UINT64);
471 /* VOP_RMDIR */
472 kstat_named_init(&vsp->nrmdir, "nrmdir", KSTAT_DATA_UINT64);
473 /* VOP_READDIR I/O */
474 kstat_named_init(&vsp->nreaddir, "nreaddir", KSTAT_DATA_UINT64);
475 kstat_named_init(&vsp->readdir_bytes, "readdir_bytes",
476 KSTAT_DATA_UINT64);
477 /* VOP_SYMLINK */
478 kstat_named_init(&vsp->nsymlink, "nsymlink", KSTAT_DATA_UINT64);
479 /* VOP_READLINK */
480 kstat_named_init(&vsp->nreadlink, "nreadlink", KSTAT_DATA_UINT64);
481 /* VOP_FSYNC */
482 kstat_named_init(&vsp->nfsync, "nfsync", KSTAT_DATA_UINT64);
483 /* VOP_INACTIVE */
484 kstat_named_init(&vsp->ninactive, "ninactive", KSTAT_DATA_UINT64);
485 /* VOP_FID */
486 kstat_named_init(&vsp->nfid, "nfid", KSTAT_DATA_UINT64);
487 /* VOP_RWLOCK */
488 kstat_named_init(&vsp->nrwlock, "nrwlock", KSTAT_DATA_UINT64);
489 /* VOP_RWUNLOCK */
490 kstat_named_init(&vsp->nrwunlock, "nrwunlock", KSTAT_DATA_UINT64);
491 /* VOP_SEEK */
492 kstat_named_init(&vsp->nseek, "nseek", KSTAT_DATA_UINT64);
493 /* VOP_CMP */
494 kstat_named_init(&vsp->ncmp, "ncmp", KSTAT_DATA_UINT64);
495 /* VOP_FRLOCK */
496 kstat_named_init(&vsp->nfrlock, "nfrlock", KSTAT_DATA_UINT64);
497 /* VOP_SPACE */
498 kstat_named_init(&vsp->nspace, "nspace", KSTAT_DATA_UINT64);
499 /* VOP_REALVP */
500 kstat_named_init(&vsp->nrealvp, "nrealvp", KSTAT_DATA_UINT64);
501 /* VOP_GETPAGE */
502 kstat_named_init(&vsp->ngetpage, "ngetpage", KSTAT_DATA_UINT64);
503 /* VOP_PUTPAGE */
504 kstat_named_init(&vsp->nputpage, "nputpage", KSTAT_DATA_UINT64);
505 /* VOP_MAP */
506 kstat_named_init(&vsp->nmap, "nmap", KSTAT_DATA_UINT64);
507 /* VOP_ADDMAP */
508 kstat_named_init(&vsp->naddmap, "naddmap", KSTAT_DATA_UINT64);
509 /* VOP_DELMAP */
510 kstat_named_init(&vsp->ndelmap, "ndelmap", KSTAT_DATA_UINT64);
511 /* VOP_POLL */
512 kstat_named_init(&vsp->npoll, "npoll", KSTAT_DATA_UINT64);
513 /* VOP_DUMP */
514 kstat_named_init(&vsp->ndump, "ndump", KSTAT_DATA_UINT64);
515 /* VOP_PATHCONF */
516 kstat_named_init(&vsp->npathconf, "npathconf", KSTAT_DATA_UINT64);
517 /* VOP_PAGEIO */
518 kstat_named_init(&vsp->npageio, "npageio", KSTAT_DATA_UINT64);
519 /* VOP_DUMPCTL */
520 kstat_named_init(&vsp->ndumpctl, "ndumpctl", KSTAT_DATA_UINT64);
521 /* VOP_DISPOSE */
522 kstat_named_init(&vsp->ndispose, "ndispose", KSTAT_DATA_UINT64);
523 /* VOP_SETSECATTR */
524 kstat_named_init(&vsp->nsetsecattr, "nsetsecattr", KSTAT_DATA_UINT64);
525 /* VOP_GETSECATTR */
526 kstat_named_init(&vsp->ngetsecattr, "ngetsecattr", KSTAT_DATA_UINT64);
527 /* VOP_SHRLOCK */
528 kstat_named_init(&vsp->nshrlock, "nshrlock", KSTAT_DATA_UINT64);
529 /* VOP_VNEVENT */
530 kstat_named_init(&vsp->nvnevent, "nvnevent", KSTAT_DATA_UINT64);
531 /* VOP_REQZCBUF */
532 kstat_named_init(&vsp->nreqzcbuf, "nreqzcbuf", KSTAT_DATA_UINT64);
533 /* VOP_RETZCBUF */
534 kstat_named_init(&vsp->nretzcbuf, "nretzcbuf", KSTAT_DATA_UINT64);
535
536 return (vsp);
537 }
538
539 /*
540 * Creates a kstat structure associated with a vopstats structure.
541 */
542 kstat_t *
new_vskstat(char * ksname,vopstats_t * vsp)543 new_vskstat(char *ksname, vopstats_t *vsp)
544 {
545 kstat_t *ksp;
546
547 if (!vopstats_enabled) {
548 return (NULL);
549 }
550
551 ksp = kstat_create("unix", 0, ksname, "misc", KSTAT_TYPE_NAMED,
552 sizeof (vopstats_t)/sizeof (kstat_named_t),
553 KSTAT_FLAG_VIRTUAL|KSTAT_FLAG_WRITABLE);
554 if (ksp) {
555 ksp->ks_data = vsp;
556 kstat_install(ksp);
557 }
558
559 return (ksp);
560 }
561
562 /*
563 * Called from vfsinit() to initialize the support mechanisms for vopstats
564 */
565 void
vopstats_startup()566 vopstats_startup()
567 {
568 if (!vopstats_enabled)
569 return;
570
571 /*
572 * Creates the AVL tree which holds per-vfs vopstat anchors. This
573 * is necessary since we need to check if a kstat exists before we
574 * attempt to create it. Also, initialize its lock.
575 */
576 avl_create(&vskstat_tree, vska_compar, sizeof (vsk_anchor_t),
577 offsetof(vsk_anchor_t, vsk_node));
578 mutex_init(&vskstat_tree_lock, NULL, MUTEX_DEFAULT, NULL);
579
580 vsk_anchor_cache = kmem_cache_create("vsk_anchor_cache",
581 sizeof (vsk_anchor_t), sizeof (uintptr_t), NULL, NULL, NULL,
582 NULL, NULL, 0);
583
584 /*
585 * Set up the array of pointers for the vopstats-by-FS-type.
586 * The entries will be allocated/initialized as each file system
587 * goes through modload/mod_installfs.
588 */
589 vopstats_fstype = (vopstats_t **)kmem_zalloc(
590 (sizeof (vopstats_t *) * nfstype), KM_SLEEP);
591
592 /* Set up the global vopstats initialization template */
593 vs_templatep = create_vopstats_template();
594 }
595
596 /*
597 * We need to have the all of the counters zeroed.
598 * The initialization of the vopstats_t includes on the order of
599 * 50 calls to kstat_named_init(). Rather that do that on every call,
600 * we do it once in a template (vs_templatep) then bcopy it over.
601 */
602 void
initialize_vopstats(vopstats_t * vsp)603 initialize_vopstats(vopstats_t *vsp)
604 {
605 if (vsp == NULL)
606 return;
607
608 bcopy(vs_templatep, vsp, sizeof (vopstats_t));
609 }
610
611 /*
612 * If possible, determine which vopstats by fstype to use and
613 * return a pointer to the caller.
614 */
615 vopstats_t *
get_fstype_vopstats(vfs_t * vfsp,struct vfssw * vswp)616 get_fstype_vopstats(vfs_t *vfsp, struct vfssw *vswp)
617 {
618 int fstype = 0; /* Index into vfssw[] */
619 vopstats_t *vsp = NULL;
620
621 if (vfsp == NULL || (vfsp->vfs_flag & VFS_STATS) == 0 ||
622 !vopstats_enabled)
623 return (NULL);
624 /*
625 * Set up the fstype. We go to so much trouble because all versions
626 * of NFS use the same fstype in their vfs even though they have
627 * distinct entries in the vfssw[] table.
628 * NOTE: A special vfs (e.g., EIO_vfs) may not have an entry.
629 */
630 if (vswp) {
631 fstype = vswp - vfssw; /* Gets us the index */
632 } else {
633 fstype = vfsp->vfs_fstype;
634 }
635
636 /*
637 * Point to the per-fstype vopstats. The only valid values are
638 * non-zero positive values less than the number of vfssw[] table
639 * entries.
640 */
641 if (fstype > 0 && fstype < nfstype) {
642 vsp = vopstats_fstype[fstype];
643 }
644
645 return (vsp);
646 }
647
648 /*
649 * Generate a kstat name, create the kstat structure, and allocate a
650 * vsk_anchor_t to hold it together. Return the pointer to the vsk_anchor_t
651 * to the caller. This must only be called from a mount.
652 */
653 vsk_anchor_t *
get_vskstat_anchor(vfs_t * vfsp)654 get_vskstat_anchor(vfs_t *vfsp)
655 {
656 char kstatstr[KSTAT_STRLEN]; /* kstat name for vopstats */
657 statvfs64_t statvfsbuf; /* Needed to find f_fsid */
658 vsk_anchor_t *vskp = NULL; /* vfs <--> kstat anchor */
659 kstat_t *ksp; /* Ptr to new kstat */
660 avl_index_t where; /* Location in the AVL tree */
661
662 if (vfsp == NULL || vfsp->vfs_implp == NULL ||
663 (vfsp->vfs_flag & VFS_STATS) == 0 || !vopstats_enabled)
664 return (NULL);
665
666 /* Need to get the fsid to build a kstat name */
667 if (VFS_STATVFS(vfsp, &statvfsbuf) == 0) {
668 /* Create a name for our kstats based on fsid */
669 (void) snprintf(kstatstr, KSTAT_STRLEN, "%s%lx",
670 VOPSTATS_STR, statvfsbuf.f_fsid);
671
672 /* Allocate and initialize the vsk_anchor_t */
673 vskp = kmem_cache_alloc(vsk_anchor_cache, KM_SLEEP);
674 bzero(vskp, sizeof (*vskp));
675 vskp->vsk_fsid = statvfsbuf.f_fsid;
676
677 mutex_enter(&vskstat_tree_lock);
678 if (avl_find(&vskstat_tree, vskp, &where) == NULL) {
679 avl_insert(&vskstat_tree, vskp, where);
680 mutex_exit(&vskstat_tree_lock);
681
682 /*
683 * Now that we've got the anchor in the AVL
684 * tree, we can create the kstat.
685 */
686 ksp = new_vskstat(kstatstr, &vfsp->vfs_vopstats);
687 if (ksp) {
688 vskp->vsk_ksp = ksp;
689 }
690 } else {
691 /* Oops, found one! Release memory and lock. */
692 mutex_exit(&vskstat_tree_lock);
693 kmem_cache_free(vsk_anchor_cache, vskp);
694 vskp = NULL;
695 }
696 }
697 return (vskp);
698 }
699
700 /*
701 * We're in the process of tearing down the vfs and need to cleanup
702 * the data structures associated with the vopstats. Must only be called
703 * from dounmount().
704 */
705 void
teardown_vopstats(vfs_t * vfsp)706 teardown_vopstats(vfs_t *vfsp)
707 {
708 vsk_anchor_t *vskap;
709 avl_index_t where;
710
711 if (vfsp == NULL || vfsp->vfs_implp == NULL ||
712 (vfsp->vfs_flag & VFS_STATS) == 0 || !vopstats_enabled)
713 return;
714
715 /* This is a safe check since VFS_STATS must be set (see above) */
716 if ((vskap = vfsp->vfs_vskap) == NULL)
717 return;
718
719 /* Whack the pointer right away */
720 vfsp->vfs_vskap = NULL;
721
722 /* Lock the tree, remove the node, and delete the kstat */
723 mutex_enter(&vskstat_tree_lock);
724 if (avl_find(&vskstat_tree, vskap, &where)) {
725 avl_remove(&vskstat_tree, vskap);
726 }
727
728 if (vskap->vsk_ksp) {
729 kstat_delete(vskap->vsk_ksp);
730 }
731 mutex_exit(&vskstat_tree_lock);
732
733 kmem_cache_free(vsk_anchor_cache, vskap);
734 }
735
736 /*
737 * Read or write a vnode. Called from kernel code.
738 */
739 int
vn_rdwr(enum uio_rw rw,struct vnode * vp,caddr_t base,ssize_t len,offset_t offset,enum uio_seg seg,int ioflag,rlim64_t ulimit,cred_t * cr,ssize_t * residp)740 vn_rdwr(
741 enum uio_rw rw,
742 struct vnode *vp,
743 caddr_t base,
744 ssize_t len,
745 offset_t offset,
746 enum uio_seg seg,
747 int ioflag,
748 rlim64_t ulimit, /* meaningful only if rw is UIO_WRITE */
749 cred_t *cr,
750 ssize_t *residp)
751 {
752 struct uio uio;
753 struct iovec iov;
754 int error;
755 int in_crit = 0;
756
757 if (rw == UIO_WRITE && ISROFILE(vp))
758 return (EROFS);
759
760 if (len < 0)
761 return (EIO);
762
763 VOPXID_MAP_CR(vp, cr);
764
765 iov.iov_base = base;
766 iov.iov_len = len;
767 uio.uio_iov = &iov;
768 uio.uio_iovcnt = 1;
769 uio.uio_loffset = offset;
770 uio.uio_segflg = (short)seg;
771 uio.uio_resid = len;
772 uio.uio_llimit = ulimit;
773
774 /*
775 * We have to enter the critical region before calling VOP_RWLOCK
776 * to avoid a deadlock with ufs.
777 */
778 if (nbl_need_check(vp)) {
779 int svmand;
780
781 nbl_start_crit(vp, RW_READER);
782 in_crit = 1;
783 error = nbl_svmand(vp, cr, &svmand);
784 if (error != 0)
785 goto done;
786 if (nbl_conflict(vp, rw == UIO_WRITE ? NBL_WRITE : NBL_READ,
787 uio.uio_offset, uio.uio_resid, svmand, NULL)) {
788 error = EACCES;
789 goto done;
790 }
791 }
792
793 (void) VOP_RWLOCK(vp,
794 rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE, NULL);
795 if (rw == UIO_WRITE) {
796 uio.uio_fmode = FWRITE;
797 uio.uio_extflg = UIO_COPY_DEFAULT;
798 error = VOP_WRITE(vp, &uio, ioflag, cr, NULL);
799 } else {
800 uio.uio_fmode = FREAD;
801 uio.uio_extflg = UIO_COPY_CACHED;
802 error = VOP_READ(vp, &uio, ioflag, cr, NULL);
803 }
804 VOP_RWUNLOCK(vp,
805 rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE, NULL);
806 if (residp)
807 *residp = uio.uio_resid;
808 else if (uio.uio_resid)
809 error = EIO;
810
811 done:
812 if (in_crit)
813 nbl_end_crit(vp);
814 return (error);
815 }
816
817 /*
818 * Release a vnode. Call VOP_INACTIVE on last reference or
819 * decrement reference count.
820 *
821 * To avoid race conditions, the v_count is left at 1 for
822 * the call to VOP_INACTIVE. This prevents another thread
823 * from reclaiming and releasing the vnode *before* the
824 * VOP_INACTIVE routine has a chance to destroy the vnode.
825 * We can't have more than 1 thread calling VOP_INACTIVE
826 * on a vnode.
827 */
828 void
vn_rele(vnode_t * vp)829 vn_rele(vnode_t *vp)
830 {
831 VERIFY(vp->v_count > 0);
832 mutex_enter(&vp->v_lock);
833 if (vp->v_count == 1) {
834 mutex_exit(&vp->v_lock);
835 VOP_INACTIVE(vp, CRED(), NULL);
836 return;
837 }
838 vp->v_count--;
839 mutex_exit(&vp->v_lock);
840 }
841
842 /*
843 * Release a vnode referenced by the DNLC. Multiple DNLC references are treated
844 * as a single reference, so v_count is not decremented until the last DNLC hold
845 * is released. This makes it possible to distinguish vnodes that are referenced
846 * only by the DNLC.
847 */
848 void
vn_rele_dnlc(vnode_t * vp)849 vn_rele_dnlc(vnode_t *vp)
850 {
851 VERIFY((vp->v_count > 0) && (vp->v_count_dnlc > 0));
852 mutex_enter(&vp->v_lock);
853 if (--vp->v_count_dnlc == 0) {
854 if (vp->v_count == 1) {
855 mutex_exit(&vp->v_lock);
856 VOP_INACTIVE(vp, CRED(), NULL);
857 return;
858 }
859 vp->v_count--;
860 }
861 mutex_exit(&vp->v_lock);
862 }
863
864 /*
865 * Like vn_rele() except that it clears v_stream under v_lock.
866 * This is used by sockfs when it dismantels the association between
867 * the sockfs node and the vnode in the underlaying file system.
868 * v_lock has to be held to prevent a thread coming through the lookupname
869 * path from accessing a stream head that is going away.
870 */
871 void
vn_rele_stream(vnode_t * vp)872 vn_rele_stream(vnode_t *vp)
873 {
874 VERIFY(vp->v_count > 0);
875 mutex_enter(&vp->v_lock);
876 vp->v_stream = NULL;
877 if (vp->v_count == 1) {
878 mutex_exit(&vp->v_lock);
879 VOP_INACTIVE(vp, CRED(), NULL);
880 return;
881 }
882 vp->v_count--;
883 mutex_exit(&vp->v_lock);
884 }
885
886 static void
vn_rele_inactive(vnode_t * vp)887 vn_rele_inactive(vnode_t *vp)
888 {
889 VOP_INACTIVE(vp, CRED(), NULL);
890 }
891
892 /*
893 * Like vn_rele() except if we are going to call VOP_INACTIVE() then do it
894 * asynchronously using a taskq. This can avoid deadlocks caused by re-entering
895 * the file system as a result of releasing the vnode. Note, file systems
896 * already have to handle the race where the vnode is incremented before the
897 * inactive routine is called and does its locking.
898 *
899 * Warning: Excessive use of this routine can lead to performance problems.
900 * This is because taskqs throttle back allocation if too many are created.
901 */
902 void
vn_rele_async(vnode_t * vp,taskq_t * taskq)903 vn_rele_async(vnode_t *vp, taskq_t *taskq)
904 {
905 VERIFY(vp->v_count > 0);
906 mutex_enter(&vp->v_lock);
907 if (vp->v_count == 1) {
908 mutex_exit(&vp->v_lock);
909 VERIFY(taskq_dispatch(taskq, (task_func_t *)vn_rele_inactive,
910 vp, TQ_SLEEP) != NULL);
911 return;
912 }
913 vp->v_count--;
914 mutex_exit(&vp->v_lock);
915 }
916
917 int
vn_open(char * pnamep,enum uio_seg seg,int filemode,int createmode,struct vnode ** vpp,enum create crwhy,mode_t umask)918 vn_open(
919 char *pnamep,
920 enum uio_seg seg,
921 int filemode,
922 int createmode,
923 struct vnode **vpp,
924 enum create crwhy,
925 mode_t umask)
926 {
927 return (vn_openat(pnamep, seg, filemode, createmode, vpp, crwhy,
928 umask, NULL, -1));
929 }
930
931
932 /*
933 * Open/create a vnode.
934 * This may be callable by the kernel, the only known use
935 * of user context being that the current user credentials
936 * are used for permissions. crwhy is defined iff filemode & FCREAT.
937 */
938 int
vn_openat(char * pnamep,enum uio_seg seg,int filemode,int createmode,struct vnode ** vpp,enum create crwhy,mode_t umask,struct vnode * startvp,int fd)939 vn_openat(
940 char *pnamep,
941 enum uio_seg seg,
942 int filemode,
943 int createmode,
944 struct vnode **vpp,
945 enum create crwhy,
946 mode_t umask,
947 struct vnode *startvp,
948 int fd)
949 {
950 struct vnode *vp;
951 int mode;
952 int accessflags;
953 int error;
954 int in_crit = 0;
955 int open_done = 0;
956 int shrlock_done = 0;
957 struct vattr vattr;
958 enum symfollow follow;
959 int estale_retry = 0;
960 struct shrlock shr;
961 struct shr_locowner shr_own;
962
963 mode = 0;
964 accessflags = 0;
965 if (filemode & FREAD)
966 mode |= VREAD;
967 if (filemode & (FWRITE|FTRUNC))
968 mode |= VWRITE;
969 if (filemode & (FSEARCH|FEXEC|FXATTRDIROPEN))
970 mode |= VEXEC;
971
972 /* symlink interpretation */
973 if (filemode & FNOFOLLOW)
974 follow = NO_FOLLOW;
975 else
976 follow = FOLLOW;
977
978 if (filemode & FAPPEND)
979 accessflags |= V_APPEND;
980
981 top:
982 if (filemode & FCREAT) {
983 enum vcexcl excl;
984
985 /*
986 * Wish to create a file.
987 */
988 vattr.va_type = VREG;
989 vattr.va_mode = createmode;
990 vattr.va_mask = AT_TYPE|AT_MODE;
991 if (filemode & FTRUNC) {
992 vattr.va_size = 0;
993 vattr.va_mask |= AT_SIZE;
994 }
995 if (filemode & FEXCL)
996 excl = EXCL;
997 else
998 excl = NONEXCL;
999
1000 if (error =
1001 vn_createat(pnamep, seg, &vattr, excl, mode, &vp, crwhy,
1002 (filemode & ~(FTRUNC|FEXCL)), umask, startvp))
1003 return (error);
1004 } else {
1005 /*
1006 * Wish to open a file. Just look it up.
1007 */
1008 if (error = lookupnameat(pnamep, seg, follow,
1009 NULLVPP, &vp, startvp)) {
1010 if ((error == ESTALE) &&
1011 fs_need_estale_retry(estale_retry++))
1012 goto top;
1013 return (error);
1014 }
1015
1016 /*
1017 * Get the attributes to check whether file is large.
1018 * We do this only if the FOFFMAX flag is not set and
1019 * only for regular files.
1020 */
1021
1022 if (!(filemode & FOFFMAX) && (vp->v_type == VREG)) {
1023 vattr.va_mask = AT_SIZE;
1024 if ((error = VOP_GETATTR(vp, &vattr, 0,
1025 CRED(), NULL))) {
1026 goto out;
1027 }
1028 if (vattr.va_size > (u_offset_t)MAXOFF32_T) {
1029 /*
1030 * Large File API - regular open fails
1031 * if FOFFMAX flag is set in file mode
1032 */
1033 error = EOVERFLOW;
1034 goto out;
1035 }
1036 }
1037 /*
1038 * Can't write directories, active texts, or
1039 * read-only filesystems. Can't truncate files
1040 * on which mandatory locking is in effect.
1041 */
1042 if (filemode & (FWRITE|FTRUNC)) {
1043 /*
1044 * Allow writable directory if VDIROPEN flag is set.
1045 */
1046 if (vp->v_type == VDIR && !(vp->v_flag & VDIROPEN)) {
1047 error = EISDIR;
1048 goto out;
1049 }
1050 if (ISROFILE(vp)) {
1051 error = EROFS;
1052 goto out;
1053 }
1054 /*
1055 * Can't truncate files on which
1056 * sysv mandatory locking is in effect.
1057 */
1058 if (filemode & FTRUNC) {
1059 vnode_t *rvp;
1060
1061 if (VOP_REALVP(vp, &rvp, NULL) != 0)
1062 rvp = vp;
1063 if (rvp->v_filocks != NULL) {
1064 vattr.va_mask = AT_MODE;
1065 if ((error = VOP_GETATTR(vp,
1066 &vattr, 0, CRED(), NULL)) == 0 &&
1067 MANDLOCK(vp, vattr.va_mode))
1068 error = EAGAIN;
1069 }
1070 }
1071 if (error)
1072 goto out;
1073 }
1074 /*
1075 * Check permissions.
1076 */
1077 if (error = VOP_ACCESS(vp, mode, accessflags, CRED(), NULL))
1078 goto out;
1079 /*
1080 * Require FSEARCH to return a directory.
1081 * Require FEXEC to return a regular file.
1082 */
1083 if ((filemode & FSEARCH) && vp->v_type != VDIR) {
1084 error = ENOTDIR;
1085 goto out;
1086 }
1087 if ((filemode & FEXEC) && vp->v_type != VREG) {
1088 error = ENOEXEC; /* XXX: error code? */
1089 goto out;
1090 }
1091 }
1092
1093 /*
1094 * Do remaining checks for FNOFOLLOW and FNOLINKS.
1095 */
1096 if ((filemode & FNOFOLLOW) && vp->v_type == VLNK) {
1097 error = ELOOP;
1098 goto out;
1099 }
1100 if (filemode & FNOLINKS) {
1101 vattr.va_mask = AT_NLINK;
1102 if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))) {
1103 goto out;
1104 }
1105 if (vattr.va_nlink != 1) {
1106 error = EMLINK;
1107 goto out;
1108 }
1109 }
1110
1111 /*
1112 * Opening a socket corresponding to the AF_UNIX pathname
1113 * in the filesystem name space is not supported.
1114 * However, VSOCK nodes in namefs are supported in order
1115 * to make fattach work for sockets.
1116 *
1117 * XXX This uses VOP_REALVP to distinguish between
1118 * an unopened namefs node (where VOP_REALVP returns a
1119 * different VSOCK vnode) and a VSOCK created by vn_create
1120 * in some file system (where VOP_REALVP would never return
1121 * a different vnode).
1122 */
1123 if (vp->v_type == VSOCK) {
1124 struct vnode *nvp;
1125
1126 error = VOP_REALVP(vp, &nvp, NULL);
1127 if (error != 0 || nvp == NULL || nvp == vp ||
1128 nvp->v_type != VSOCK) {
1129 error = EOPNOTSUPP;
1130 goto out;
1131 }
1132 }
1133
1134 if ((vp->v_type == VREG) && nbl_need_check(vp)) {
1135 /* get share reservation */
1136 shr.s_access = 0;
1137 if (filemode & FWRITE)
1138 shr.s_access |= F_WRACC;
1139 if (filemode & FREAD)
1140 shr.s_access |= F_RDACC;
1141 shr.s_deny = 0;
1142 shr.s_sysid = 0;
1143 shr.s_pid = ttoproc(curthread)->p_pid;
1144 shr_own.sl_pid = shr.s_pid;
1145 shr_own.sl_id = fd;
1146 shr.s_own_len = sizeof (shr_own);
1147 shr.s_owner = (caddr_t)&shr_own;
1148 error = VOP_SHRLOCK(vp, F_SHARE_NBMAND, &shr, filemode, CRED(),
1149 NULL);
1150 if (error)
1151 goto out;
1152 shrlock_done = 1;
1153
1154 /* nbmand conflict check if truncating file */
1155 if ((filemode & FTRUNC) && !(filemode & FCREAT)) {
1156 nbl_start_crit(vp, RW_READER);
1157 in_crit = 1;
1158
1159 vattr.va_mask = AT_SIZE;
1160 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
1161 goto out;
1162 if (nbl_conflict(vp, NBL_WRITE, 0, vattr.va_size, 0,
1163 NULL)) {
1164 error = EACCES;
1165 goto out;
1166 }
1167 }
1168 }
1169
1170 /*
1171 * Do opening protocol.
1172 */
1173 error = VOP_OPEN(&vp, filemode, CRED(), NULL);
1174 if (error)
1175 goto out;
1176 open_done = 1;
1177
1178 /*
1179 * Truncate if required.
1180 */
1181 if ((filemode & FTRUNC) && !(filemode & FCREAT)) {
1182 vattr.va_size = 0;
1183 vattr.va_mask = AT_SIZE;
1184 if ((error = VOP_SETATTR(vp, &vattr, 0, CRED(), NULL)) != 0)
1185 goto out;
1186 }
1187 out:
1188 ASSERT(vp->v_count > 0);
1189
1190 if (in_crit) {
1191 nbl_end_crit(vp);
1192 in_crit = 0;
1193 }
1194 if (error) {
1195 if (open_done) {
1196 (void) VOP_CLOSE(vp, filemode, 1, (offset_t)0, CRED(),
1197 NULL);
1198 open_done = 0;
1199 shrlock_done = 0;
1200 }
1201 if (shrlock_done) {
1202 (void) VOP_SHRLOCK(vp, F_UNSHARE, &shr, 0, CRED(),
1203 NULL);
1204 shrlock_done = 0;
1205 }
1206
1207 /*
1208 * The following clause was added to handle a problem
1209 * with NFS consistency. It is possible that a lookup
1210 * of the file to be opened succeeded, but the file
1211 * itself doesn't actually exist on the server. This
1212 * is chiefly due to the DNLC containing an entry for
1213 * the file which has been removed on the server. In
1214 * this case, we just start over. If there was some
1215 * other cause for the ESTALE error, then the lookup
1216 * of the file will fail and the error will be returned
1217 * above instead of looping around from here.
1218 */
1219 VN_RELE(vp);
1220 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1221 goto top;
1222 } else
1223 *vpp = vp;
1224 return (error);
1225 }
1226
1227 /*
1228 * The following two accessor functions are for the NFSv4 server. Since there
1229 * is no VOP_OPEN_UP/DOWNGRADE we need a way for the NFS server to keep the
1230 * vnode open counts correct when a client "upgrades" an open or does an
1231 * open_downgrade. In NFS, an upgrade or downgrade can not only change the
1232 * open mode (add or subtract read or write), but also change the share/deny
1233 * modes. However, share reservations are not integrated with OPEN, yet, so
1234 * we need to handle each separately. These functions are cleaner than having
1235 * the NFS server manipulate the counts directly, however, nobody else should
1236 * use these functions.
1237 */
1238 void
vn_open_upgrade(vnode_t * vp,int filemode)1239 vn_open_upgrade(
1240 vnode_t *vp,
1241 int filemode)
1242 {
1243 ASSERT(vp->v_type == VREG);
1244
1245 if (filemode & FREAD)
1246 atomic_add_32(&(vp->v_rdcnt), 1);
1247 if (filemode & FWRITE)
1248 atomic_add_32(&(vp->v_wrcnt), 1);
1249
1250 }
1251
1252 void
vn_open_downgrade(vnode_t * vp,int filemode)1253 vn_open_downgrade(
1254 vnode_t *vp,
1255 int filemode)
1256 {
1257 ASSERT(vp->v_type == VREG);
1258
1259 if (filemode & FREAD) {
1260 ASSERT(vp->v_rdcnt > 0);
1261 atomic_add_32(&(vp->v_rdcnt), -1);
1262 }
1263 if (filemode & FWRITE) {
1264 ASSERT(vp->v_wrcnt > 0);
1265 atomic_add_32(&(vp->v_wrcnt), -1);
1266 }
1267
1268 }
1269
1270 int
vn_create(char * pnamep,enum uio_seg seg,struct vattr * vap,enum vcexcl excl,int mode,struct vnode ** vpp,enum create why,int flag,mode_t umask)1271 vn_create(
1272 char *pnamep,
1273 enum uio_seg seg,
1274 struct vattr *vap,
1275 enum vcexcl excl,
1276 int mode,
1277 struct vnode **vpp,
1278 enum create why,
1279 int flag,
1280 mode_t umask)
1281 {
1282 return (vn_createat(pnamep, seg, vap, excl, mode, vpp, why, flag,
1283 umask, NULL));
1284 }
1285
1286 /*
1287 * Create a vnode (makenode).
1288 */
1289 int
vn_createat(char * pnamep,enum uio_seg seg,struct vattr * vap,enum vcexcl excl,int mode,struct vnode ** vpp,enum create why,int flag,mode_t umask,struct vnode * startvp)1290 vn_createat(
1291 char *pnamep,
1292 enum uio_seg seg,
1293 struct vattr *vap,
1294 enum vcexcl excl,
1295 int mode,
1296 struct vnode **vpp,
1297 enum create why,
1298 int flag,
1299 mode_t umask,
1300 struct vnode *startvp)
1301 {
1302 struct vnode *dvp; /* ptr to parent dir vnode */
1303 struct vnode *vp = NULL;
1304 struct pathname pn;
1305 int error;
1306 int in_crit = 0;
1307 struct vattr vattr;
1308 enum symfollow follow;
1309 int estale_retry = 0;
1310 uint32_t auditing = AU_AUDITING();
1311
1312 ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
1313
1314 /* symlink interpretation */
1315 if ((flag & FNOFOLLOW) || excl == EXCL)
1316 follow = NO_FOLLOW;
1317 else
1318 follow = FOLLOW;
1319 flag &= ~(FNOFOLLOW|FNOLINKS);
1320
1321 top:
1322 /*
1323 * Lookup directory.
1324 * If new object is a file, call lower level to create it.
1325 * Note that it is up to the lower level to enforce exclusive
1326 * creation, if the file is already there.
1327 * This allows the lower level to do whatever
1328 * locking or protocol that is needed to prevent races.
1329 * If the new object is directory call lower level to make
1330 * the new directory, with "." and "..".
1331 */
1332 if (error = pn_get(pnamep, seg, &pn))
1333 return (error);
1334 if (auditing)
1335 audit_vncreate_start();
1336 dvp = NULL;
1337 *vpp = NULL;
1338 /*
1339 * lookup will find the parent directory for the vnode.
1340 * When it is done the pn holds the name of the entry
1341 * in the directory.
1342 * If this is a non-exclusive create we also find the node itself.
1343 */
1344 error = lookuppnat(&pn, NULL, follow, &dvp,
1345 (excl == EXCL) ? NULLVPP : vpp, startvp);
1346 if (error) {
1347 pn_free(&pn);
1348 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1349 goto top;
1350 if (why == CRMKDIR && error == EINVAL)
1351 error = EEXIST; /* SVID */
1352 return (error);
1353 }
1354
1355 if (why != CRMKNOD)
1356 vap->va_mode &= ~VSVTX;
1357
1358 /*
1359 * If default ACLs are defined for the directory don't apply the
1360 * umask if umask is passed.
1361 */
1362
1363 if (umask) {
1364
1365 vsecattr_t vsec;
1366
1367 vsec.vsa_aclcnt = 0;
1368 vsec.vsa_aclentp = NULL;
1369 vsec.vsa_dfaclcnt = 0;
1370 vsec.vsa_dfaclentp = NULL;
1371 vsec.vsa_mask = VSA_DFACLCNT;
1372 error = VOP_GETSECATTR(dvp, &vsec, 0, CRED(), NULL);
1373 /*
1374 * If error is ENOSYS then treat it as no error
1375 * Don't want to force all file systems to support
1376 * aclent_t style of ACL's.
1377 */
1378 if (error == ENOSYS)
1379 error = 0;
1380 if (error) {
1381 if (*vpp != NULL)
1382 VN_RELE(*vpp);
1383 goto out;
1384 } else {
1385 /*
1386 * Apply the umask if no default ACLs.
1387 */
1388 if (vsec.vsa_dfaclcnt == 0)
1389 vap->va_mode &= ~umask;
1390
1391 /*
1392 * VOP_GETSECATTR() may have allocated memory for
1393 * ACLs we didn't request, so double-check and
1394 * free it if necessary.
1395 */
1396 if (vsec.vsa_aclcnt && vsec.vsa_aclentp != NULL)
1397 kmem_free((caddr_t)vsec.vsa_aclentp,
1398 vsec.vsa_aclcnt * sizeof (aclent_t));
1399 if (vsec.vsa_dfaclcnt && vsec.vsa_dfaclentp != NULL)
1400 kmem_free((caddr_t)vsec.vsa_dfaclentp,
1401 vsec.vsa_dfaclcnt * sizeof (aclent_t));
1402 }
1403 }
1404
1405 /*
1406 * In general we want to generate EROFS if the file system is
1407 * readonly. However, POSIX (IEEE Std. 1003.1) section 5.3.1
1408 * documents the open system call, and it says that O_CREAT has no
1409 * effect if the file already exists. Bug 1119649 states
1410 * that open(path, O_CREAT, ...) fails when attempting to open an
1411 * existing file on a read only file system. Thus, the first part
1412 * of the following if statement has 3 checks:
1413 * if the file exists &&
1414 * it is being open with write access &&
1415 * the file system is read only
1416 * then generate EROFS
1417 */
1418 if ((*vpp != NULL && (mode & VWRITE) && ISROFILE(*vpp)) ||
1419 (*vpp == NULL && dvp->v_vfsp->vfs_flag & VFS_RDONLY)) {
1420 if (*vpp)
1421 VN_RELE(*vpp);
1422 error = EROFS;
1423 } else if (excl == NONEXCL && *vpp != NULL) {
1424 vnode_t *rvp;
1425
1426 /*
1427 * File already exists. If a mandatory lock has been
1428 * applied, return error.
1429 */
1430 vp = *vpp;
1431 if (VOP_REALVP(vp, &rvp, NULL) != 0)
1432 rvp = vp;
1433 if ((vap->va_mask & AT_SIZE) && nbl_need_check(vp)) {
1434 nbl_start_crit(vp, RW_READER);
1435 in_crit = 1;
1436 }
1437 if (rvp->v_filocks != NULL || rvp->v_shrlocks != NULL) {
1438 vattr.va_mask = AT_MODE|AT_SIZE;
1439 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) {
1440 goto out;
1441 }
1442 if (MANDLOCK(vp, vattr.va_mode)) {
1443 error = EAGAIN;
1444 goto out;
1445 }
1446 /*
1447 * File cannot be truncated if non-blocking mandatory
1448 * locks are currently on the file.
1449 */
1450 if ((vap->va_mask & AT_SIZE) && in_crit) {
1451 u_offset_t offset;
1452 ssize_t length;
1453
1454 offset = vap->va_size > vattr.va_size ?
1455 vattr.va_size : vap->va_size;
1456 length = vap->va_size > vattr.va_size ?
1457 vap->va_size - vattr.va_size :
1458 vattr.va_size - vap->va_size;
1459 if (nbl_conflict(vp, NBL_WRITE, offset,
1460 length, 0, NULL)) {
1461 error = EACCES;
1462 goto out;
1463 }
1464 }
1465 }
1466
1467 /*
1468 * If the file is the root of a VFS, we've crossed a
1469 * mount point and the "containing" directory that we
1470 * acquired above (dvp) is irrelevant because it's in
1471 * a different file system. We apply VOP_CREATE to the
1472 * target itself instead of to the containing directory
1473 * and supply a null path name to indicate (conventionally)
1474 * the node itself as the "component" of interest.
1475 *
1476 * The intercession of the file system is necessary to
1477 * ensure that the appropriate permission checks are
1478 * done.
1479 */
1480 if (vp->v_flag & VROOT) {
1481 ASSERT(why != CRMKDIR);
1482 error = VOP_CREATE(vp, "", vap, excl, mode, vpp,
1483 CRED(), flag, NULL, NULL);
1484 /*
1485 * If the create succeeded, it will have created
1486 * a new reference to the vnode. Give up the
1487 * original reference. The assertion should not
1488 * get triggered because NBMAND locks only apply to
1489 * VREG files. And if in_crit is non-zero for some
1490 * reason, detect that here, rather than when we
1491 * deference a null vp.
1492 */
1493 ASSERT(in_crit == 0);
1494 VN_RELE(vp);
1495 vp = NULL;
1496 goto out;
1497 }
1498
1499 /*
1500 * Large File API - non-large open (FOFFMAX flag not set)
1501 * of regular file fails if the file size exceeds MAXOFF32_T.
1502 */
1503 if (why != CRMKDIR &&
1504 !(flag & FOFFMAX) &&
1505 (vp->v_type == VREG)) {
1506 vattr.va_mask = AT_SIZE;
1507 if ((error = VOP_GETATTR(vp, &vattr, 0,
1508 CRED(), NULL))) {
1509 goto out;
1510 }
1511 if ((vattr.va_size > (u_offset_t)MAXOFF32_T)) {
1512 error = EOVERFLOW;
1513 goto out;
1514 }
1515 }
1516 }
1517
1518 if (error == 0) {
1519 /*
1520 * Call mkdir() if specified, otherwise create().
1521 */
1522 int must_be_dir = pn_fixslash(&pn); /* trailing '/'? */
1523
1524 if (why == CRMKDIR)
1525 /*
1526 * N.B., if vn_createat() ever requests
1527 * case-insensitive behavior then it will need
1528 * to be passed to VOP_MKDIR(). VOP_CREATE()
1529 * will already get it via "flag"
1530 */
1531 error = VOP_MKDIR(dvp, pn.pn_path, vap, vpp, CRED(),
1532 NULL, 0, NULL);
1533 else if (!must_be_dir)
1534 error = VOP_CREATE(dvp, pn.pn_path, vap,
1535 excl, mode, vpp, CRED(), flag, NULL, NULL);
1536 else
1537 error = ENOTDIR;
1538 }
1539
1540 out:
1541
1542 if (auditing)
1543 audit_vncreate_finish(*vpp, error);
1544 if (in_crit) {
1545 nbl_end_crit(vp);
1546 in_crit = 0;
1547 }
1548 if (vp != NULL) {
1549 VN_RELE(vp);
1550 vp = NULL;
1551 }
1552 pn_free(&pn);
1553 VN_RELE(dvp);
1554 /*
1555 * The following clause was added to handle a problem
1556 * with NFS consistency. It is possible that a lookup
1557 * of the file to be created succeeded, but the file
1558 * itself doesn't actually exist on the server. This
1559 * is chiefly due to the DNLC containing an entry for
1560 * the file which has been removed on the server. In
1561 * this case, we just start over. If there was some
1562 * other cause for the ESTALE error, then the lookup
1563 * of the file will fail and the error will be returned
1564 * above instead of looping around from here.
1565 */
1566 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1567 goto top;
1568 return (error);
1569 }
1570
1571 int
vn_link(char * from,char * to,enum uio_seg seg)1572 vn_link(char *from, char *to, enum uio_seg seg)
1573 {
1574 return (vn_linkat(NULL, from, NO_FOLLOW, NULL, to, seg));
1575 }
1576
1577 int
vn_linkat(vnode_t * fstartvp,char * from,enum symfollow follow,vnode_t * tstartvp,char * to,enum uio_seg seg)1578 vn_linkat(vnode_t *fstartvp, char *from, enum symfollow follow,
1579 vnode_t *tstartvp, char *to, enum uio_seg seg)
1580 {
1581 struct vnode *fvp; /* from vnode ptr */
1582 struct vnode *tdvp; /* to directory vnode ptr */
1583 struct pathname pn;
1584 int error;
1585 struct vattr vattr;
1586 dev_t fsid;
1587 int estale_retry = 0;
1588 uint32_t auditing = AU_AUDITING();
1589
1590 top:
1591 fvp = tdvp = NULL;
1592 if (error = pn_get(to, seg, &pn))
1593 return (error);
1594 if (auditing && fstartvp != NULL)
1595 audit_setfsat_path(1);
1596 if (error = lookupnameat(from, seg, follow, NULLVPP, &fvp, fstartvp))
1597 goto out;
1598 if (auditing && tstartvp != NULL)
1599 audit_setfsat_path(3);
1600 if (error = lookuppnat(&pn, NULL, NO_FOLLOW, &tdvp, NULLVPP, tstartvp))
1601 goto out;
1602 /*
1603 * Make sure both source vnode and target directory vnode are
1604 * in the same vfs and that it is writeable.
1605 */
1606 vattr.va_mask = AT_FSID;
1607 if (error = VOP_GETATTR(fvp, &vattr, 0, CRED(), NULL))
1608 goto out;
1609 fsid = vattr.va_fsid;
1610 vattr.va_mask = AT_FSID;
1611 if (error = VOP_GETATTR(tdvp, &vattr, 0, CRED(), NULL))
1612 goto out;
1613 if (fsid != vattr.va_fsid) {
1614 error = EXDEV;
1615 goto out;
1616 }
1617 if (tdvp->v_vfsp->vfs_flag & VFS_RDONLY) {
1618 error = EROFS;
1619 goto out;
1620 }
1621 /*
1622 * Do the link.
1623 */
1624 (void) pn_fixslash(&pn);
1625 error = VOP_LINK(tdvp, fvp, pn.pn_path, CRED(), NULL, 0);
1626 out:
1627 pn_free(&pn);
1628 if (fvp)
1629 VN_RELE(fvp);
1630 if (tdvp)
1631 VN_RELE(tdvp);
1632 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1633 goto top;
1634 return (error);
1635 }
1636
1637 int
vn_rename(char * from,char * to,enum uio_seg seg)1638 vn_rename(char *from, char *to, enum uio_seg seg)
1639 {
1640 return (vn_renameat(NULL, from, NULL, to, seg));
1641 }
1642
1643 int
vn_renameat(vnode_t * fdvp,char * fname,vnode_t * tdvp,char * tname,enum uio_seg seg)1644 vn_renameat(vnode_t *fdvp, char *fname, vnode_t *tdvp,
1645 char *tname, enum uio_seg seg)
1646 {
1647 int error;
1648 struct vattr vattr;
1649 struct pathname fpn; /* from pathname */
1650 struct pathname tpn; /* to pathname */
1651 dev_t fsid;
1652 int in_crit_src, in_crit_targ;
1653 vnode_t *fromvp, *fvp;
1654 vnode_t *tovp, *targvp;
1655 int estale_retry = 0;
1656 uint32_t auditing = AU_AUDITING();
1657
1658 top:
1659 fvp = fromvp = tovp = targvp = NULL;
1660 in_crit_src = in_crit_targ = 0;
1661 /*
1662 * Get to and from pathnames.
1663 */
1664 if (error = pn_get(fname, seg, &fpn))
1665 return (error);
1666 if (error = pn_get(tname, seg, &tpn)) {
1667 pn_free(&fpn);
1668 return (error);
1669 }
1670
1671 /*
1672 * First we need to resolve the correct directories
1673 * The passed in directories may only be a starting point,
1674 * but we need the real directories the file(s) live in.
1675 * For example the fname may be something like usr/lib/sparc
1676 * and we were passed in the / directory, but we need to
1677 * use the lib directory for the rename.
1678 */
1679
1680 if (auditing && fdvp != NULL)
1681 audit_setfsat_path(1);
1682 /*
1683 * Lookup to and from directories.
1684 */
1685 if (error = lookuppnat(&fpn, NULL, NO_FOLLOW, &fromvp, &fvp, fdvp)) {
1686 goto out;
1687 }
1688
1689 /*
1690 * Make sure there is an entry.
1691 */
1692 if (fvp == NULL) {
1693 error = ENOENT;
1694 goto out;
1695 }
1696
1697 if (auditing && tdvp != NULL)
1698 audit_setfsat_path(3);
1699 if (error = lookuppnat(&tpn, NULL, NO_FOLLOW, &tovp, &targvp, tdvp)) {
1700 goto out;
1701 }
1702
1703 /*
1704 * Make sure both the from vnode directory and the to directory
1705 * are in the same vfs and the to directory is writable.
1706 * We check fsid's, not vfs pointers, so loopback fs works.
1707 */
1708 if (fromvp != tovp) {
1709 vattr.va_mask = AT_FSID;
1710 if (error = VOP_GETATTR(fromvp, &vattr, 0, CRED(), NULL))
1711 goto out;
1712 fsid = vattr.va_fsid;
1713 vattr.va_mask = AT_FSID;
1714 if (error = VOP_GETATTR(tovp, &vattr, 0, CRED(), NULL))
1715 goto out;
1716 if (fsid != vattr.va_fsid) {
1717 error = EXDEV;
1718 goto out;
1719 }
1720 }
1721
1722 if (tovp->v_vfsp->vfs_flag & VFS_RDONLY) {
1723 error = EROFS;
1724 goto out;
1725 }
1726
1727 if (targvp && (fvp != targvp)) {
1728 nbl_start_crit(targvp, RW_READER);
1729 in_crit_targ = 1;
1730 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
1731 error = EACCES;
1732 goto out;
1733 }
1734 }
1735
1736 if (nbl_need_check(fvp)) {
1737 nbl_start_crit(fvp, RW_READER);
1738 in_crit_src = 1;
1739 if (nbl_conflict(fvp, NBL_RENAME, 0, 0, 0, NULL)) {
1740 error = EACCES;
1741 goto out;
1742 }
1743 }
1744
1745 /*
1746 * Do the rename.
1747 */
1748 (void) pn_fixslash(&tpn);
1749 error = VOP_RENAME(fromvp, fpn.pn_path, tovp, tpn.pn_path, CRED(),
1750 NULL, 0);
1751
1752 out:
1753 pn_free(&fpn);
1754 pn_free(&tpn);
1755 if (in_crit_src)
1756 nbl_end_crit(fvp);
1757 if (in_crit_targ)
1758 nbl_end_crit(targvp);
1759 if (fromvp)
1760 VN_RELE(fromvp);
1761 if (tovp)
1762 VN_RELE(tovp);
1763 if (targvp)
1764 VN_RELE(targvp);
1765 if (fvp)
1766 VN_RELE(fvp);
1767 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1768 goto top;
1769 return (error);
1770 }
1771
1772 /*
1773 * Remove a file or directory.
1774 */
1775 int
vn_remove(char * fnamep,enum uio_seg seg,enum rm dirflag)1776 vn_remove(char *fnamep, enum uio_seg seg, enum rm dirflag)
1777 {
1778 return (vn_removeat(NULL, fnamep, seg, dirflag));
1779 }
1780
1781 int
vn_removeat(vnode_t * startvp,char * fnamep,enum uio_seg seg,enum rm dirflag)1782 vn_removeat(vnode_t *startvp, char *fnamep, enum uio_seg seg, enum rm dirflag)
1783 {
1784 struct vnode *vp; /* entry vnode */
1785 struct vnode *dvp; /* ptr to parent dir vnode */
1786 struct vnode *coveredvp;
1787 struct pathname pn; /* name of entry */
1788 enum vtype vtype;
1789 int error;
1790 struct vfs *vfsp;
1791 struct vfs *dvfsp; /* ptr to parent dir vfs */
1792 int in_crit = 0;
1793 int estale_retry = 0;
1794
1795 top:
1796 if (error = pn_get(fnamep, seg, &pn))
1797 return (error);
1798 dvp = vp = NULL;
1799 if (error = lookuppnat(&pn, NULL, NO_FOLLOW, &dvp, &vp, startvp)) {
1800 pn_free(&pn);
1801 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1802 goto top;
1803 return (error);
1804 }
1805
1806 /*
1807 * Make sure there is an entry.
1808 */
1809 if (vp == NULL) {
1810 error = ENOENT;
1811 goto out;
1812 }
1813
1814 vfsp = vp->v_vfsp;
1815 dvfsp = dvp->v_vfsp;
1816
1817 /*
1818 * If the named file is the root of a mounted filesystem, fail,
1819 * unless it's marked unlinkable. In that case, unmount the
1820 * filesystem and proceed to unlink the covered vnode. (If the
1821 * covered vnode is a directory, use rmdir instead of unlink,
1822 * to avoid file system corruption.)
1823 */
1824 if (vp->v_flag & VROOT) {
1825 if ((vfsp->vfs_flag & VFS_UNLINKABLE) == 0) {
1826 error = EBUSY;
1827 goto out;
1828 }
1829
1830 /*
1831 * Namefs specific code starts here.
1832 */
1833
1834 if (dirflag == RMDIRECTORY) {
1835 /*
1836 * User called rmdir(2) on a file that has
1837 * been namefs mounted on top of. Since
1838 * namefs doesn't allow directories to
1839 * be mounted on other files we know
1840 * vp is not of type VDIR so fail to operation.
1841 */
1842 error = ENOTDIR;
1843 goto out;
1844 }
1845
1846 /*
1847 * If VROOT is still set after grabbing vp->v_lock,
1848 * noone has finished nm_unmount so far and coveredvp
1849 * is valid.
1850 * If we manage to grab vn_vfswlock(coveredvp) before releasing
1851 * vp->v_lock, any race window is eliminated.
1852 */
1853
1854 mutex_enter(&vp->v_lock);
1855 if ((vp->v_flag & VROOT) == 0) {
1856 /* Someone beat us to the unmount */
1857 mutex_exit(&vp->v_lock);
1858 error = EBUSY;
1859 goto out;
1860 }
1861 vfsp = vp->v_vfsp;
1862 coveredvp = vfsp->vfs_vnodecovered;
1863 ASSERT(coveredvp);
1864 /*
1865 * Note: Implementation of vn_vfswlock shows that ordering of
1866 * v_lock / vn_vfswlock is not an issue here.
1867 */
1868 error = vn_vfswlock(coveredvp);
1869 mutex_exit(&vp->v_lock);
1870
1871 if (error)
1872 goto out;
1873
1874 VN_HOLD(coveredvp);
1875 VN_RELE(vp);
1876 error = dounmount(vfsp, 0, CRED());
1877
1878 /*
1879 * Unmounted the namefs file system; now get
1880 * the object it was mounted over.
1881 */
1882 vp = coveredvp;
1883 /*
1884 * If namefs was mounted over a directory, then
1885 * we want to use rmdir() instead of unlink().
1886 */
1887 if (vp->v_type == VDIR)
1888 dirflag = RMDIRECTORY;
1889
1890 if (error)
1891 goto out;
1892 }
1893
1894 /*
1895 * Make sure filesystem is writeable.
1896 * We check the parent directory's vfs in case this is an lofs vnode.
1897 */
1898 if (dvfsp && dvfsp->vfs_flag & VFS_RDONLY) {
1899 error = EROFS;
1900 goto out;
1901 }
1902
1903 vtype = vp->v_type;
1904
1905 /*
1906 * If there is the possibility of an nbmand share reservation, make
1907 * sure it's okay to remove the file. Keep a reference to the
1908 * vnode, so that we can exit the nbl critical region after
1909 * calling VOP_REMOVE.
1910 * If there is no possibility of an nbmand share reservation,
1911 * release the vnode reference now. Filesystems like NFS may
1912 * behave differently if there is an extra reference, so get rid of
1913 * this one. Fortunately, we can't have nbmand mounts on NFS
1914 * filesystems.
1915 */
1916 if (nbl_need_check(vp)) {
1917 nbl_start_crit(vp, RW_READER);
1918 in_crit = 1;
1919 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
1920 error = EACCES;
1921 goto out;
1922 }
1923 } else {
1924 VN_RELE(vp);
1925 vp = NULL;
1926 }
1927
1928 if (dirflag == RMDIRECTORY) {
1929 /*
1930 * Caller is using rmdir(2), which can only be applied to
1931 * directories.
1932 */
1933 if (vtype != VDIR) {
1934 error = ENOTDIR;
1935 } else {
1936 vnode_t *cwd;
1937 proc_t *pp = curproc;
1938
1939 mutex_enter(&pp->p_lock);
1940 cwd = PTOU(pp)->u_cdir;
1941 VN_HOLD(cwd);
1942 mutex_exit(&pp->p_lock);
1943 error = VOP_RMDIR(dvp, pn.pn_path, cwd, CRED(),
1944 NULL, 0);
1945 VN_RELE(cwd);
1946 }
1947 } else {
1948 /*
1949 * Unlink(2) can be applied to anything.
1950 */
1951 error = VOP_REMOVE(dvp, pn.pn_path, CRED(), NULL, 0);
1952 }
1953
1954 out:
1955 pn_free(&pn);
1956 if (in_crit) {
1957 nbl_end_crit(vp);
1958 in_crit = 0;
1959 }
1960 if (vp != NULL)
1961 VN_RELE(vp);
1962 if (dvp != NULL)
1963 VN_RELE(dvp);
1964 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1965 goto top;
1966 return (error);
1967 }
1968
1969 /*
1970 * Utility function to compare equality of vnodes.
1971 * Compare the underlying real vnodes, if there are underlying vnodes.
1972 * This is a more thorough comparison than the VN_CMP() macro provides.
1973 */
1974 int
vn_compare(vnode_t * vp1,vnode_t * vp2)1975 vn_compare(vnode_t *vp1, vnode_t *vp2)
1976 {
1977 vnode_t *realvp;
1978
1979 if (vp1 != NULL && VOP_REALVP(vp1, &realvp, NULL) == 0)
1980 vp1 = realvp;
1981 if (vp2 != NULL && VOP_REALVP(vp2, &realvp, NULL) == 0)
1982 vp2 = realvp;
1983 return (VN_CMP(vp1, vp2));
1984 }
1985
1986 /*
1987 * The number of locks to hash into. This value must be a power
1988 * of 2 minus 1 and should probably also be prime.
1989 */
1990 #define NUM_BUCKETS 1023
1991
1992 struct vn_vfslocks_bucket {
1993 kmutex_t vb_lock;
1994 vn_vfslocks_entry_t *vb_list;
1995 char pad[64 - sizeof (kmutex_t) - sizeof (void *)];
1996 };
1997
1998 /*
1999 * Total number of buckets will be NUM_BUCKETS + 1 .
2000 */
2001
2002 #pragma align 64(vn_vfslocks_buckets)
2003 static struct vn_vfslocks_bucket vn_vfslocks_buckets[NUM_BUCKETS + 1];
2004
2005 #define VN_VFSLOCKS_SHIFT 9
2006
2007 #define VN_VFSLOCKS_HASH(vfsvpptr) \
2008 ((((intptr_t)(vfsvpptr)) >> VN_VFSLOCKS_SHIFT) & NUM_BUCKETS)
2009
2010 /*
2011 * vn_vfslocks_getlock() uses an HASH scheme to generate
2012 * rwstlock using vfs/vnode pointer passed to it.
2013 *
2014 * vn_vfslocks_rele() releases a reference in the
2015 * HASH table which allows the entry allocated by
2016 * vn_vfslocks_getlock() to be freed at a later
2017 * stage when the refcount drops to zero.
2018 */
2019
2020 vn_vfslocks_entry_t *
vn_vfslocks_getlock(void * vfsvpptr)2021 vn_vfslocks_getlock(void *vfsvpptr)
2022 {
2023 struct vn_vfslocks_bucket *bp;
2024 vn_vfslocks_entry_t *vep;
2025 vn_vfslocks_entry_t *tvep;
2026
2027 ASSERT(vfsvpptr != NULL);
2028 bp = &vn_vfslocks_buckets[VN_VFSLOCKS_HASH(vfsvpptr)];
2029
2030 mutex_enter(&bp->vb_lock);
2031 for (vep = bp->vb_list; vep != NULL; vep = vep->ve_next) {
2032 if (vep->ve_vpvfs == vfsvpptr) {
2033 vep->ve_refcnt++;
2034 mutex_exit(&bp->vb_lock);
2035 return (vep);
2036 }
2037 }
2038 mutex_exit(&bp->vb_lock);
2039 vep = kmem_alloc(sizeof (*vep), KM_SLEEP);
2040 rwst_init(&vep->ve_lock, NULL, RW_DEFAULT, NULL);
2041 vep->ve_vpvfs = (char *)vfsvpptr;
2042 vep->ve_refcnt = 1;
2043 mutex_enter(&bp->vb_lock);
2044 for (tvep = bp->vb_list; tvep != NULL; tvep = tvep->ve_next) {
2045 if (tvep->ve_vpvfs == vfsvpptr) {
2046 tvep->ve_refcnt++;
2047 mutex_exit(&bp->vb_lock);
2048
2049 /*
2050 * There is already an entry in the hash
2051 * destroy what we just allocated.
2052 */
2053 rwst_destroy(&vep->ve_lock);
2054 kmem_free(vep, sizeof (*vep));
2055 return (tvep);
2056 }
2057 }
2058 vep->ve_next = bp->vb_list;
2059 bp->vb_list = vep;
2060 mutex_exit(&bp->vb_lock);
2061 return (vep);
2062 }
2063
2064 void
vn_vfslocks_rele(vn_vfslocks_entry_t * vepent)2065 vn_vfslocks_rele(vn_vfslocks_entry_t *vepent)
2066 {
2067 struct vn_vfslocks_bucket *bp;
2068 vn_vfslocks_entry_t *vep;
2069 vn_vfslocks_entry_t *pvep;
2070
2071 ASSERT(vepent != NULL);
2072 ASSERT(vepent->ve_vpvfs != NULL);
2073
2074 bp = &vn_vfslocks_buckets[VN_VFSLOCKS_HASH(vepent->ve_vpvfs)];
2075
2076 mutex_enter(&bp->vb_lock);
2077 vepent->ve_refcnt--;
2078
2079 if ((int32_t)vepent->ve_refcnt < 0)
2080 cmn_err(CE_PANIC, "vn_vfslocks_rele: refcount negative");
2081
2082 if (vepent->ve_refcnt == 0) {
2083 for (vep = bp->vb_list; vep != NULL; vep = vep->ve_next) {
2084 if (vep->ve_vpvfs == vepent->ve_vpvfs) {
2085 if (bp->vb_list == vep)
2086 bp->vb_list = vep->ve_next;
2087 else {
2088 /* LINTED */
2089 pvep->ve_next = vep->ve_next;
2090 }
2091 mutex_exit(&bp->vb_lock);
2092 rwst_destroy(&vep->ve_lock);
2093 kmem_free(vep, sizeof (*vep));
2094 return;
2095 }
2096 pvep = vep;
2097 }
2098 cmn_err(CE_PANIC, "vn_vfslocks_rele: vp/vfs not found");
2099 }
2100 mutex_exit(&bp->vb_lock);
2101 }
2102
2103 /*
2104 * vn_vfswlock_wait is used to implement a lock which is logically a writers
2105 * lock protecting the v_vfsmountedhere field.
2106 * vn_vfswlock_wait has been modified to be similar to vn_vfswlock,
2107 * except that it blocks to acquire the lock VVFSLOCK.
2108 *
2109 * traverse() and routines re-implementing part of traverse (e.g. autofs)
2110 * need to hold this lock. mount(), vn_rename(), vn_remove() and so on
2111 * need the non-blocking version of the writers lock i.e. vn_vfswlock
2112 */
2113 int
vn_vfswlock_wait(vnode_t * vp)2114 vn_vfswlock_wait(vnode_t *vp)
2115 {
2116 int retval;
2117 vn_vfslocks_entry_t *vpvfsentry;
2118 ASSERT(vp != NULL);
2119
2120 vpvfsentry = vn_vfslocks_getlock(vp);
2121 retval = rwst_enter_sig(&vpvfsentry->ve_lock, RW_WRITER);
2122
2123 if (retval == EINTR) {
2124 vn_vfslocks_rele(vpvfsentry);
2125 return (EINTR);
2126 }
2127 return (retval);
2128 }
2129
2130 int
vn_vfsrlock_wait(vnode_t * vp)2131 vn_vfsrlock_wait(vnode_t *vp)
2132 {
2133 int retval;
2134 vn_vfslocks_entry_t *vpvfsentry;
2135 ASSERT(vp != NULL);
2136
2137 vpvfsentry = vn_vfslocks_getlock(vp);
2138 retval = rwst_enter_sig(&vpvfsentry->ve_lock, RW_READER);
2139
2140 if (retval == EINTR) {
2141 vn_vfslocks_rele(vpvfsentry);
2142 return (EINTR);
2143 }
2144
2145 return (retval);
2146 }
2147
2148
2149 /*
2150 * vn_vfswlock is used to implement a lock which is logically a writers lock
2151 * protecting the v_vfsmountedhere field.
2152 */
2153 int
vn_vfswlock(vnode_t * vp)2154 vn_vfswlock(vnode_t *vp)
2155 {
2156 vn_vfslocks_entry_t *vpvfsentry;
2157
2158 /*
2159 * If vp is NULL then somebody is trying to lock the covered vnode
2160 * of /. (vfs_vnodecovered is NULL for /). This situation will
2161 * only happen when unmounting /. Since that operation will fail
2162 * anyway, return EBUSY here instead of in VFS_UNMOUNT.
2163 */
2164 if (vp == NULL)
2165 return (EBUSY);
2166
2167 vpvfsentry = vn_vfslocks_getlock(vp);
2168
2169 if (rwst_tryenter(&vpvfsentry->ve_lock, RW_WRITER))
2170 return (0);
2171
2172 vn_vfslocks_rele(vpvfsentry);
2173 return (EBUSY);
2174 }
2175
2176 int
vn_vfsrlock(vnode_t * vp)2177 vn_vfsrlock(vnode_t *vp)
2178 {
2179 vn_vfslocks_entry_t *vpvfsentry;
2180
2181 /*
2182 * If vp is NULL then somebody is trying to lock the covered vnode
2183 * of /. (vfs_vnodecovered is NULL for /). This situation will
2184 * only happen when unmounting /. Since that operation will fail
2185 * anyway, return EBUSY here instead of in VFS_UNMOUNT.
2186 */
2187 if (vp == NULL)
2188 return (EBUSY);
2189
2190 vpvfsentry = vn_vfslocks_getlock(vp);
2191
2192 if (rwst_tryenter(&vpvfsentry->ve_lock, RW_READER))
2193 return (0);
2194
2195 vn_vfslocks_rele(vpvfsentry);
2196 return (EBUSY);
2197 }
2198
2199 void
vn_vfsunlock(vnode_t * vp)2200 vn_vfsunlock(vnode_t *vp)
2201 {
2202 vn_vfslocks_entry_t *vpvfsentry;
2203
2204 /*
2205 * ve_refcnt needs to be decremented twice.
2206 * 1. To release refernce after a call to vn_vfslocks_getlock()
2207 * 2. To release the reference from the locking routines like
2208 * vn_vfsrlock/vn_vfswlock etc,.
2209 */
2210 vpvfsentry = vn_vfslocks_getlock(vp);
2211 vn_vfslocks_rele(vpvfsentry);
2212
2213 rwst_exit(&vpvfsentry->ve_lock);
2214 vn_vfslocks_rele(vpvfsentry);
2215 }
2216
2217 int
vn_vfswlock_held(vnode_t * vp)2218 vn_vfswlock_held(vnode_t *vp)
2219 {
2220 int held;
2221 vn_vfslocks_entry_t *vpvfsentry;
2222
2223 ASSERT(vp != NULL);
2224
2225 vpvfsentry = vn_vfslocks_getlock(vp);
2226 held = rwst_lock_held(&vpvfsentry->ve_lock, RW_WRITER);
2227
2228 vn_vfslocks_rele(vpvfsentry);
2229 return (held);
2230 }
2231
2232
2233 int
vn_make_ops(const char * name,const fs_operation_def_t * templ,vnodeops_t ** actual)2234 vn_make_ops(
2235 const char *name, /* Name of file system */
2236 const fs_operation_def_t *templ, /* Operation specification */
2237 vnodeops_t **actual) /* Return the vnodeops */
2238 {
2239 int unused_ops;
2240 int error;
2241
2242 *actual = (vnodeops_t *)kmem_alloc(sizeof (vnodeops_t), KM_SLEEP);
2243
2244 (*actual)->vnop_name = name;
2245
2246 error = fs_build_vector(*actual, &unused_ops, vn_ops_table, templ);
2247 if (error) {
2248 kmem_free(*actual, sizeof (vnodeops_t));
2249 }
2250
2251 #if DEBUG
2252 if (unused_ops != 0)
2253 cmn_err(CE_WARN, "vn_make_ops: %s: %d operations supplied "
2254 "but not used", name, unused_ops);
2255 #endif
2256
2257 return (error);
2258 }
2259
2260 /*
2261 * Free the vnodeops created as a result of vn_make_ops()
2262 */
2263 void
vn_freevnodeops(vnodeops_t * vnops)2264 vn_freevnodeops(vnodeops_t *vnops)
2265 {
2266 kmem_free(vnops, sizeof (vnodeops_t));
2267 }
2268
2269 /*
2270 * Vnode cache.
2271 */
2272
2273 /* ARGSUSED */
2274 static int
vn_cache_constructor(void * buf,void * cdrarg,int kmflags)2275 vn_cache_constructor(void *buf, void *cdrarg, int kmflags)
2276 {
2277 struct vnode *vp;
2278
2279 vp = buf;
2280
2281 mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL);
2282 mutex_init(&vp->v_vsd_lock, NULL, MUTEX_DEFAULT, NULL);
2283 cv_init(&vp->v_cv, NULL, CV_DEFAULT, NULL);
2284 rw_init(&vp->v_nbllock, NULL, RW_DEFAULT, NULL);
2285 vp->v_femhead = NULL; /* Must be done before vn_reinit() */
2286 vp->v_path = NULL;
2287 vp->v_mpssdata = NULL;
2288 vp->v_vsd = NULL;
2289 vp->v_fopdata = NULL;
2290
2291 return (0);
2292 }
2293
2294 /* ARGSUSED */
2295 static void
vn_cache_destructor(void * buf,void * cdrarg)2296 vn_cache_destructor(void *buf, void *cdrarg)
2297 {
2298 struct vnode *vp;
2299
2300 vp = buf;
2301
2302 rw_destroy(&vp->v_nbllock);
2303 cv_destroy(&vp->v_cv);
2304 mutex_destroy(&vp->v_vsd_lock);
2305 mutex_destroy(&vp->v_lock);
2306 }
2307
2308 void
vn_create_cache(void)2309 vn_create_cache(void)
2310 {
2311 /* LINTED */
2312 ASSERT((1 << VNODE_ALIGN_LOG2) ==
2313 P2ROUNDUP(sizeof (struct vnode), VNODE_ALIGN));
2314 vn_cache = kmem_cache_create("vn_cache", sizeof (struct vnode),
2315 VNODE_ALIGN, vn_cache_constructor, vn_cache_destructor, NULL, NULL,
2316 NULL, 0);
2317 }
2318
2319 void
vn_destroy_cache(void)2320 vn_destroy_cache(void)
2321 {
2322 kmem_cache_destroy(vn_cache);
2323 }
2324
2325 /*
2326 * Used by file systems when fs-specific nodes (e.g., ufs inodes) are
2327 * cached by the file system and vnodes remain associated.
2328 */
2329 void
vn_recycle(vnode_t * vp)2330 vn_recycle(vnode_t *vp)
2331 {
2332 ASSERT(vp->v_pages == NULL);
2333
2334 /*
2335 * XXX - This really belongs in vn_reinit(), but we have some issues
2336 * with the counts. Best to have it here for clean initialization.
2337 */
2338 vp->v_rdcnt = 0;
2339 vp->v_wrcnt = 0;
2340 vp->v_mmap_read = 0;
2341 vp->v_mmap_write = 0;
2342
2343 /*
2344 * If FEM was in use, make sure everything gets cleaned up
2345 * NOTE: vp->v_femhead is initialized to NULL in the vnode
2346 * constructor.
2347 */
2348 if (vp->v_femhead) {
2349 /* XXX - There should be a free_femhead() that does all this */
2350 ASSERT(vp->v_femhead->femh_list == NULL);
2351 mutex_destroy(&vp->v_femhead->femh_lock);
2352 kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead)));
2353 vp->v_femhead = NULL;
2354 }
2355 if (vp->v_path) {
2356 kmem_free(vp->v_path, strlen(vp->v_path) + 1);
2357 vp->v_path = NULL;
2358 }
2359
2360 if (vp->v_fopdata != NULL) {
2361 free_fopdata(vp);
2362 }
2363 vp->v_mpssdata = NULL;
2364 vsd_free(vp);
2365 }
2366
2367 /*
2368 * Used to reset the vnode fields including those that are directly accessible
2369 * as well as those which require an accessor function.
2370 *
2371 * Does not initialize:
2372 * synchronization objects: v_lock, v_vsd_lock, v_nbllock, v_cv
2373 * v_data (since FS-nodes and vnodes point to each other and should
2374 * be updated simultaneously)
2375 * v_op (in case someone needs to make a VOP call on this object)
2376 */
2377 void
vn_reinit(vnode_t * vp)2378 vn_reinit(vnode_t *vp)
2379 {
2380 vp->v_count = 1;
2381 vp->v_count_dnlc = 0;
2382 vp->v_vfsp = NULL;
2383 vp->v_stream = NULL;
2384 vp->v_vfsmountedhere = NULL;
2385 vp->v_flag = 0;
2386 vp->v_type = VNON;
2387 vp->v_rdev = NODEV;
2388
2389 vp->v_filocks = NULL;
2390 vp->v_shrlocks = NULL;
2391 vp->v_pages = NULL;
2392
2393 vp->v_locality = NULL;
2394 vp->v_xattrdir = NULL;
2395
2396 /* Handles v_femhead, v_path, and the r/w/map counts */
2397 vn_recycle(vp);
2398 }
2399
2400 vnode_t *
vn_alloc(int kmflag)2401 vn_alloc(int kmflag)
2402 {
2403 vnode_t *vp;
2404
2405 vp = kmem_cache_alloc(vn_cache, kmflag);
2406
2407 if (vp != NULL) {
2408 vp->v_femhead = NULL; /* Must be done before vn_reinit() */
2409 vp->v_fopdata = NULL;
2410 vn_reinit(vp);
2411 }
2412
2413 return (vp);
2414 }
2415
2416 void
vn_free(vnode_t * vp)2417 vn_free(vnode_t *vp)
2418 {
2419 ASSERT(vp->v_shrlocks == NULL);
2420 ASSERT(vp->v_filocks == NULL);
2421
2422 /*
2423 * Some file systems call vn_free() with v_count of zero,
2424 * some with v_count of 1. In any case, the value should
2425 * never be anything else.
2426 */
2427 ASSERT((vp->v_count == 0) || (vp->v_count == 1));
2428 ASSERT(vp->v_count_dnlc == 0);
2429 if (vp->v_path != NULL) {
2430 kmem_free(vp->v_path, strlen(vp->v_path) + 1);
2431 vp->v_path = NULL;
2432 }
2433
2434 /* If FEM was in use, make sure everything gets cleaned up */
2435 if (vp->v_femhead) {
2436 /* XXX - There should be a free_femhead() that does all this */
2437 ASSERT(vp->v_femhead->femh_list == NULL);
2438 mutex_destroy(&vp->v_femhead->femh_lock);
2439 kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead)));
2440 vp->v_femhead = NULL;
2441 }
2442
2443 if (vp->v_fopdata != NULL) {
2444 free_fopdata(vp);
2445 }
2446 vp->v_mpssdata = NULL;
2447 vsd_free(vp);
2448 kmem_cache_free(vn_cache, vp);
2449 }
2450
2451 /*
2452 * vnode status changes, should define better states than 1, 0.
2453 */
2454 void
vn_reclaim(vnode_t * vp)2455 vn_reclaim(vnode_t *vp)
2456 {
2457 vfs_t *vfsp = vp->v_vfsp;
2458
2459 if (vfsp == NULL ||
2460 vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2461 return;
2462 }
2463 (void) VFS_VNSTATE(vfsp, vp, VNTRANS_RECLAIMED);
2464 }
2465
2466 void
vn_idle(vnode_t * vp)2467 vn_idle(vnode_t *vp)
2468 {
2469 vfs_t *vfsp = vp->v_vfsp;
2470
2471 if (vfsp == NULL ||
2472 vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2473 return;
2474 }
2475 (void) VFS_VNSTATE(vfsp, vp, VNTRANS_IDLED);
2476 }
2477 void
vn_exists(vnode_t * vp)2478 vn_exists(vnode_t *vp)
2479 {
2480 vfs_t *vfsp = vp->v_vfsp;
2481
2482 if (vfsp == NULL ||
2483 vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2484 return;
2485 }
2486 (void) VFS_VNSTATE(vfsp, vp, VNTRANS_EXISTS);
2487 }
2488
2489 void
vn_invalid(vnode_t * vp)2490 vn_invalid(vnode_t *vp)
2491 {
2492 vfs_t *vfsp = vp->v_vfsp;
2493
2494 if (vfsp == NULL ||
2495 vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2496 return;
2497 }
2498 (void) VFS_VNSTATE(vfsp, vp, VNTRANS_DESTROYED);
2499 }
2500
2501 /* Vnode event notification */
2502
2503 int
vnevent_support(vnode_t * vp,caller_context_t * ct)2504 vnevent_support(vnode_t *vp, caller_context_t *ct)
2505 {
2506 if (vp == NULL)
2507 return (EINVAL);
2508
2509 return (VOP_VNEVENT(vp, VE_SUPPORT, NULL, NULL, ct));
2510 }
2511
2512 void
vnevent_rename_src(vnode_t * vp,vnode_t * dvp,char * name,caller_context_t * ct)2513 vnevent_rename_src(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
2514 {
2515 if (vp == NULL || vp->v_femhead == NULL) {
2516 return;
2517 }
2518 (void) VOP_VNEVENT(vp, VE_RENAME_SRC, dvp, name, ct);
2519 }
2520
2521 void
vnevent_rename_dest(vnode_t * vp,vnode_t * dvp,char * name,caller_context_t * ct)2522 vnevent_rename_dest(vnode_t *vp, vnode_t *dvp, char *name,
2523 caller_context_t *ct)
2524 {
2525 if (vp == NULL || vp->v_femhead == NULL) {
2526 return;
2527 }
2528 (void) VOP_VNEVENT(vp, VE_RENAME_DEST, dvp, name, ct);
2529 }
2530
2531 void
vnevent_rename_dest_dir(vnode_t * vp,caller_context_t * ct)2532 vnevent_rename_dest_dir(vnode_t *vp, caller_context_t *ct)
2533 {
2534 if (vp == NULL || vp->v_femhead == NULL) {
2535 return;
2536 }
2537 (void) VOP_VNEVENT(vp, VE_RENAME_DEST_DIR, NULL, NULL, ct);
2538 }
2539
2540 void
vnevent_remove(vnode_t * vp,vnode_t * dvp,char * name,caller_context_t * ct)2541 vnevent_remove(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
2542 {
2543 if (vp == NULL || vp->v_femhead == NULL) {
2544 return;
2545 }
2546 (void) VOP_VNEVENT(vp, VE_REMOVE, dvp, name, ct);
2547 }
2548
2549 void
vnevent_rmdir(vnode_t * vp,vnode_t * dvp,char * name,caller_context_t * ct)2550 vnevent_rmdir(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
2551 {
2552 if (vp == NULL || vp->v_femhead == NULL) {
2553 return;
2554 }
2555 (void) VOP_VNEVENT(vp, VE_RMDIR, dvp, name, ct);
2556 }
2557
2558 void
vnevent_create(vnode_t * vp,caller_context_t * ct)2559 vnevent_create(vnode_t *vp, caller_context_t *ct)
2560 {
2561 if (vp == NULL || vp->v_femhead == NULL) {
2562 return;
2563 }
2564 (void) VOP_VNEVENT(vp, VE_CREATE, NULL, NULL, ct);
2565 }
2566
2567 void
vnevent_link(vnode_t * vp,caller_context_t * ct)2568 vnevent_link(vnode_t *vp, caller_context_t *ct)
2569 {
2570 if (vp == NULL || vp->v_femhead == NULL) {
2571 return;
2572 }
2573 (void) VOP_VNEVENT(vp, VE_LINK, NULL, NULL, ct);
2574 }
2575
2576 void
vnevent_mountedover(vnode_t * vp,caller_context_t * ct)2577 vnevent_mountedover(vnode_t *vp, caller_context_t *ct)
2578 {
2579 if (vp == NULL || vp->v_femhead == NULL) {
2580 return;
2581 }
2582 (void) VOP_VNEVENT(vp, VE_MOUNTEDOVER, NULL, NULL, ct);
2583 }
2584
2585 /*
2586 * Vnode accessors.
2587 */
2588
2589 int
vn_is_readonly(vnode_t * vp)2590 vn_is_readonly(vnode_t *vp)
2591 {
2592 return (vp->v_vfsp->vfs_flag & VFS_RDONLY);
2593 }
2594
2595 int
vn_has_flocks(vnode_t * vp)2596 vn_has_flocks(vnode_t *vp)
2597 {
2598 return (vp->v_filocks != NULL);
2599 }
2600
2601 int
vn_has_mandatory_locks(vnode_t * vp,int mode)2602 vn_has_mandatory_locks(vnode_t *vp, int mode)
2603 {
2604 return ((vp->v_filocks != NULL) && (MANDLOCK(vp, mode)));
2605 }
2606
2607 int
vn_has_cached_data(vnode_t * vp)2608 vn_has_cached_data(vnode_t *vp)
2609 {
2610 return (vp->v_pages != NULL);
2611 }
2612
2613 /*
2614 * Return 0 if the vnode in question shouldn't be permitted into a zone via
2615 * zone_enter(2).
2616 */
2617 int
vn_can_change_zones(vnode_t * vp)2618 vn_can_change_zones(vnode_t *vp)
2619 {
2620 struct vfssw *vswp;
2621 int allow = 1;
2622 vnode_t *rvp;
2623
2624 if (nfs_global_client_only != 0)
2625 return (1);
2626
2627 /*
2628 * We always want to look at the underlying vnode if there is one.
2629 */
2630 if (VOP_REALVP(vp, &rvp, NULL) != 0)
2631 rvp = vp;
2632 /*
2633 * Some pseudo filesystems (including doorfs) don't actually register
2634 * their vfsops_t, so the following may return NULL; we happily let
2635 * such vnodes switch zones.
2636 */
2637 vswp = vfs_getvfsswbyvfsops(vfs_getops(rvp->v_vfsp));
2638 if (vswp != NULL) {
2639 if (vswp->vsw_flag & VSW_NOTZONESAFE)
2640 allow = 0;
2641 vfs_unrefvfssw(vswp);
2642 }
2643 return (allow);
2644 }
2645
2646 /*
2647 * Return nonzero if the vnode is a mount point, zero if not.
2648 */
2649 int
vn_ismntpt(vnode_t * vp)2650 vn_ismntpt(vnode_t *vp)
2651 {
2652 return (vp->v_vfsmountedhere != NULL);
2653 }
2654
2655 /* Retrieve the vfs (if any) mounted on this vnode */
2656 vfs_t *
vn_mountedvfs(vnode_t * vp)2657 vn_mountedvfs(vnode_t *vp)
2658 {
2659 return (vp->v_vfsmountedhere);
2660 }
2661
2662 /*
2663 * Return nonzero if the vnode is referenced by the dnlc, zero if not.
2664 */
2665 int
vn_in_dnlc(vnode_t * vp)2666 vn_in_dnlc(vnode_t *vp)
2667 {
2668 return (vp->v_count_dnlc > 0);
2669 }
2670
2671 /*
2672 * vn_has_other_opens() checks whether a particular file is opened by more than
2673 * just the caller and whether the open is for read and/or write.
2674 * This routine is for calling after the caller has already called VOP_OPEN()
2675 * and the caller wishes to know if they are the only one with it open for
2676 * the mode(s) specified.
2677 *
2678 * Vnode counts are only kept on regular files (v_type=VREG).
2679 */
2680 int
vn_has_other_opens(vnode_t * vp,v_mode_t mode)2681 vn_has_other_opens(
2682 vnode_t *vp,
2683 v_mode_t mode)
2684 {
2685
2686 ASSERT(vp != NULL);
2687
2688 switch (mode) {
2689 case V_WRITE:
2690 if (vp->v_wrcnt > 1)
2691 return (V_TRUE);
2692 break;
2693 case V_RDORWR:
2694 if ((vp->v_rdcnt > 1) || (vp->v_wrcnt > 1))
2695 return (V_TRUE);
2696 break;
2697 case V_RDANDWR:
2698 if ((vp->v_rdcnt > 1) && (vp->v_wrcnt > 1))
2699 return (V_TRUE);
2700 break;
2701 case V_READ:
2702 if (vp->v_rdcnt > 1)
2703 return (V_TRUE);
2704 break;
2705 }
2706
2707 return (V_FALSE);
2708 }
2709
2710 /*
2711 * vn_is_opened() checks whether a particular file is opened and
2712 * whether the open is for read and/or write.
2713 *
2714 * Vnode counts are only kept on regular files (v_type=VREG).
2715 */
2716 int
vn_is_opened(vnode_t * vp,v_mode_t mode)2717 vn_is_opened(
2718 vnode_t *vp,
2719 v_mode_t mode)
2720 {
2721
2722 ASSERT(vp != NULL);
2723
2724 switch (mode) {
2725 case V_WRITE:
2726 if (vp->v_wrcnt)
2727 return (V_TRUE);
2728 break;
2729 case V_RDANDWR:
2730 if (vp->v_rdcnt && vp->v_wrcnt)
2731 return (V_TRUE);
2732 break;
2733 case V_RDORWR:
2734 if (vp->v_rdcnt || vp->v_wrcnt)
2735 return (V_TRUE);
2736 break;
2737 case V_READ:
2738 if (vp->v_rdcnt)
2739 return (V_TRUE);
2740 break;
2741 }
2742
2743 return (V_FALSE);
2744 }
2745
2746 /*
2747 * vn_is_mapped() checks whether a particular file is mapped and whether
2748 * the file is mapped read and/or write.
2749 */
2750 int
vn_is_mapped(vnode_t * vp,v_mode_t mode)2751 vn_is_mapped(
2752 vnode_t *vp,
2753 v_mode_t mode)
2754 {
2755
2756 ASSERT(vp != NULL);
2757
2758 #if !defined(_LP64)
2759 switch (mode) {
2760 /*
2761 * The atomic_add_64_nv functions force atomicity in the
2762 * case of 32 bit architectures. Otherwise the 64 bit values
2763 * require two fetches. The value of the fields may be
2764 * (potentially) changed between the first fetch and the
2765 * second
2766 */
2767 case V_WRITE:
2768 if (atomic_add_64_nv((&(vp->v_mmap_write)), 0))
2769 return (V_TRUE);
2770 break;
2771 case V_RDANDWR:
2772 if ((atomic_add_64_nv((&(vp->v_mmap_read)), 0)) &&
2773 (atomic_add_64_nv((&(vp->v_mmap_write)), 0)))
2774 return (V_TRUE);
2775 break;
2776 case V_RDORWR:
2777 if ((atomic_add_64_nv((&(vp->v_mmap_read)), 0)) ||
2778 (atomic_add_64_nv((&(vp->v_mmap_write)), 0)))
2779 return (V_TRUE);
2780 break;
2781 case V_READ:
2782 if (atomic_add_64_nv((&(vp->v_mmap_read)), 0))
2783 return (V_TRUE);
2784 break;
2785 }
2786 #else
2787 switch (mode) {
2788 case V_WRITE:
2789 if (vp->v_mmap_write)
2790 return (V_TRUE);
2791 break;
2792 case V_RDANDWR:
2793 if (vp->v_mmap_read && vp->v_mmap_write)
2794 return (V_TRUE);
2795 break;
2796 case V_RDORWR:
2797 if (vp->v_mmap_read || vp->v_mmap_write)
2798 return (V_TRUE);
2799 break;
2800 case V_READ:
2801 if (vp->v_mmap_read)
2802 return (V_TRUE);
2803 break;
2804 }
2805 #endif
2806
2807 return (V_FALSE);
2808 }
2809
2810 /*
2811 * Set the operations vector for a vnode.
2812 *
2813 * FEM ensures that the v_femhead pointer is filled in before the
2814 * v_op pointer is changed. This means that if the v_femhead pointer
2815 * is NULL, and the v_op field hasn't changed since before which checked
2816 * the v_femhead pointer; then our update is ok - we are not racing with
2817 * FEM.
2818 */
2819 void
vn_setops(vnode_t * vp,vnodeops_t * vnodeops)2820 vn_setops(vnode_t *vp, vnodeops_t *vnodeops)
2821 {
2822 vnodeops_t *op;
2823
2824 ASSERT(vp != NULL);
2825 ASSERT(vnodeops != NULL);
2826
2827 op = vp->v_op;
2828 membar_consumer();
2829 /*
2830 * If vp->v_femhead == NULL, then we'll call casptr() to do the
2831 * compare-and-swap on vp->v_op. If either fails, then FEM is
2832 * in effect on the vnode and we need to have FEM deal with it.
2833 */
2834 if (vp->v_femhead != NULL || casptr(&vp->v_op, op, vnodeops) != op) {
2835 fem_setvnops(vp, vnodeops);
2836 }
2837 }
2838
2839 /*
2840 * Retrieve the operations vector for a vnode
2841 * As with vn_setops(above); make sure we aren't racing with FEM.
2842 * FEM sets the v_op to a special, internal, vnodeops that wouldn't
2843 * make sense to the callers of this routine.
2844 */
2845 vnodeops_t *
vn_getops(vnode_t * vp)2846 vn_getops(vnode_t *vp)
2847 {
2848 vnodeops_t *op;
2849
2850 ASSERT(vp != NULL);
2851
2852 op = vp->v_op;
2853 membar_consumer();
2854 if (vp->v_femhead == NULL && op == vp->v_op) {
2855 return (op);
2856 } else {
2857 return (fem_getvnops(vp));
2858 }
2859 }
2860
2861 /*
2862 * Returns non-zero (1) if the vnodeops matches that of the vnode.
2863 * Returns zero (0) if not.
2864 */
2865 int
vn_matchops(vnode_t * vp,vnodeops_t * vnodeops)2866 vn_matchops(vnode_t *vp, vnodeops_t *vnodeops)
2867 {
2868 return (vn_getops(vp) == vnodeops);
2869 }
2870
2871 /*
2872 * Returns non-zero (1) if the specified operation matches the
2873 * corresponding operation for that the vnode.
2874 * Returns zero (0) if not.
2875 */
2876
2877 #define MATCHNAME(n1, n2) (((n1)[0] == (n2)[0]) && (strcmp((n1), (n2)) == 0))
2878
2879 int
vn_matchopval(vnode_t * vp,char * vopname,fs_generic_func_p funcp)2880 vn_matchopval(vnode_t *vp, char *vopname, fs_generic_func_p funcp)
2881 {
2882 const fs_operation_trans_def_t *otdp;
2883 fs_generic_func_p *loc = NULL;
2884 vnodeops_t *vop = vn_getops(vp);
2885
2886 ASSERT(vopname != NULL);
2887
2888 for (otdp = vn_ops_table; otdp->name != NULL; otdp++) {
2889 if (MATCHNAME(otdp->name, vopname)) {
2890 loc = (fs_generic_func_p *)
2891 ((char *)(vop) + otdp->offset);
2892 break;
2893 }
2894 }
2895
2896 return ((loc != NULL) && (*loc == funcp));
2897 }
2898
2899 /*
2900 * fs_new_caller_id() needs to return a unique ID on a given local system.
2901 * The IDs do not need to survive across reboots. These are primarily
2902 * used so that (FEM) monitors can detect particular callers (such as
2903 * the NFS server) to a given vnode/vfs operation.
2904 */
2905 u_longlong_t
fs_new_caller_id()2906 fs_new_caller_id()
2907 {
2908 static uint64_t next_caller_id = 0LL; /* First call returns 1 */
2909
2910 return ((u_longlong_t)atomic_add_64_nv(&next_caller_id, 1));
2911 }
2912
2913 /*
2914 * Given a starting vnode and a path, updates the path in the target vnode in
2915 * a safe manner. If the vnode already has path information embedded, then the
2916 * cached path is left untouched.
2917 */
2918
2919 size_t max_vnode_path = 4 * MAXPATHLEN;
2920
2921 void
vn_setpath(vnode_t * rootvp,struct vnode * startvp,struct vnode * vp,const char * path,size_t plen)2922 vn_setpath(vnode_t *rootvp, struct vnode *startvp, struct vnode *vp,
2923 const char *path, size_t plen)
2924 {
2925 char *rpath;
2926 vnode_t *base;
2927 size_t rpathlen, rpathalloc;
2928 int doslash = 1;
2929
2930 if (*path == '/') {
2931 base = rootvp;
2932 path++;
2933 plen--;
2934 } else {
2935 base = startvp;
2936 }
2937
2938 /*
2939 * We cannot grab base->v_lock while we hold vp->v_lock because of
2940 * the potential for deadlock.
2941 */
2942 mutex_enter(&base->v_lock);
2943 if (base->v_path == NULL) {
2944 mutex_exit(&base->v_lock);
2945 return;
2946 }
2947
2948 rpathlen = strlen(base->v_path);
2949 rpathalloc = rpathlen + plen + 1;
2950 /* Avoid adding a slash if there's already one there */
2951 if (base->v_path[rpathlen-1] == '/')
2952 doslash = 0;
2953 else
2954 rpathalloc++;
2955
2956 /*
2957 * We don't want to call kmem_alloc(KM_SLEEP) with kernel locks held,
2958 * so we must do this dance. If, by chance, something changes the path,
2959 * just give up since there is no real harm.
2960 */
2961 mutex_exit(&base->v_lock);
2962
2963 /* Paths should stay within reason */
2964 if (rpathalloc > max_vnode_path)
2965 return;
2966
2967 rpath = kmem_alloc(rpathalloc, KM_SLEEP);
2968
2969 mutex_enter(&base->v_lock);
2970 if (base->v_path == NULL || strlen(base->v_path) != rpathlen) {
2971 mutex_exit(&base->v_lock);
2972 kmem_free(rpath, rpathalloc);
2973 return;
2974 }
2975 bcopy(base->v_path, rpath, rpathlen);
2976 mutex_exit(&base->v_lock);
2977
2978 if (doslash)
2979 rpath[rpathlen++] = '/';
2980 bcopy(path, rpath + rpathlen, plen);
2981 rpath[rpathlen + plen] = '\0';
2982
2983 mutex_enter(&vp->v_lock);
2984 if (vp->v_path != NULL) {
2985 mutex_exit(&vp->v_lock);
2986 kmem_free(rpath, rpathalloc);
2987 } else {
2988 vp->v_path = rpath;
2989 mutex_exit(&vp->v_lock);
2990 }
2991 }
2992
2993 /*
2994 * Sets the path to the vnode to be the given string, regardless of current
2995 * context. The string must be a complete path from rootdir. This is only used
2996 * by fsop_root() for setting the path based on the mountpoint.
2997 */
2998 void
vn_setpath_str(struct vnode * vp,const char * str,size_t len)2999 vn_setpath_str(struct vnode *vp, const char *str, size_t len)
3000 {
3001 char *buf = kmem_alloc(len + 1, KM_SLEEP);
3002
3003 mutex_enter(&vp->v_lock);
3004 if (vp->v_path != NULL) {
3005 mutex_exit(&vp->v_lock);
3006 kmem_free(buf, len + 1);
3007 return;
3008 }
3009
3010 vp->v_path = buf;
3011 bcopy(str, vp->v_path, len);
3012 vp->v_path[len] = '\0';
3013
3014 mutex_exit(&vp->v_lock);
3015 }
3016
3017 /*
3018 * Called from within filesystem's vop_rename() to handle renames once the
3019 * target vnode is available.
3020 */
3021 void
vn_renamepath(vnode_t * dvp,vnode_t * vp,const char * nm,size_t len)3022 vn_renamepath(vnode_t *dvp, vnode_t *vp, const char *nm, size_t len)
3023 {
3024 char *tmp;
3025
3026 mutex_enter(&vp->v_lock);
3027 tmp = vp->v_path;
3028 vp->v_path = NULL;
3029 mutex_exit(&vp->v_lock);
3030 vn_setpath(rootdir, dvp, vp, nm, len);
3031 if (tmp != NULL)
3032 kmem_free(tmp, strlen(tmp) + 1);
3033 }
3034
3035 /*
3036 * Similar to vn_setpath_str(), this function sets the path of the destination
3037 * vnode to the be the same as the source vnode.
3038 */
3039 void
vn_copypath(struct vnode * src,struct vnode * dst)3040 vn_copypath(struct vnode *src, struct vnode *dst)
3041 {
3042 char *buf;
3043 int alloc;
3044
3045 mutex_enter(&src->v_lock);
3046 if (src->v_path == NULL) {
3047 mutex_exit(&src->v_lock);
3048 return;
3049 }
3050 alloc = strlen(src->v_path) + 1;
3051
3052 /* avoid kmem_alloc() with lock held */
3053 mutex_exit(&src->v_lock);
3054 buf = kmem_alloc(alloc, KM_SLEEP);
3055 mutex_enter(&src->v_lock);
3056 if (src->v_path == NULL || strlen(src->v_path) + 1 != alloc) {
3057 mutex_exit(&src->v_lock);
3058 kmem_free(buf, alloc);
3059 return;
3060 }
3061 bcopy(src->v_path, buf, alloc);
3062 mutex_exit(&src->v_lock);
3063
3064 mutex_enter(&dst->v_lock);
3065 if (dst->v_path != NULL) {
3066 mutex_exit(&dst->v_lock);
3067 kmem_free(buf, alloc);
3068 return;
3069 }
3070 dst->v_path = buf;
3071 mutex_exit(&dst->v_lock);
3072 }
3073
3074 /*
3075 * XXX Private interface for segvn routines that handle vnode
3076 * large page segments.
3077 *
3078 * return 1 if vp's file system VOP_PAGEIO() implementation
3079 * can be safely used instead of VOP_GETPAGE() for handling
3080 * pagefaults against regular non swap files. VOP_PAGEIO()
3081 * interface is considered safe here if its implementation
3082 * is very close to VOP_GETPAGE() implementation.
3083 * e.g. It zero's out the part of the page beyond EOF. Doesn't
3084 * panic if there're file holes but instead returns an error.
3085 * Doesn't assume file won't be changed by user writes, etc.
3086 *
3087 * return 0 otherwise.
3088 *
3089 * For now allow segvn to only use VOP_PAGEIO() with ufs and nfs.
3090 */
3091 int
vn_vmpss_usepageio(vnode_t * vp)3092 vn_vmpss_usepageio(vnode_t *vp)
3093 {
3094 vfs_t *vfsp = vp->v_vfsp;
3095 char *fsname = vfssw[vfsp->vfs_fstype].vsw_name;
3096 char *pageio_ok_fss[] = {"ufs", "nfs", NULL};
3097 char **fsok = pageio_ok_fss;
3098
3099 if (fsname == NULL) {
3100 return (0);
3101 }
3102
3103 for (; *fsok; fsok++) {
3104 if (strcmp(*fsok, fsname) == 0) {
3105 return (1);
3106 }
3107 }
3108 return (0);
3109 }
3110
3111 /* VOP_XXX() macros call the corresponding fop_xxx() function */
3112
3113 int
fop_open(vnode_t ** vpp,int mode,cred_t * cr,caller_context_t * ct)3114 fop_open(
3115 vnode_t **vpp,
3116 int mode,
3117 cred_t *cr,
3118 caller_context_t *ct)
3119 {
3120 int ret;
3121 vnode_t *vp = *vpp;
3122
3123 VN_HOLD(vp);
3124 /*
3125 * Adding to the vnode counts before calling open
3126 * avoids the need for a mutex. It circumvents a race
3127 * condition where a query made on the vnode counts results in a
3128 * false negative. The inquirer goes away believing the file is
3129 * not open when there is an open on the file already under way.
3130 *
3131 * The counts are meant to prevent NFS from granting a delegation
3132 * when it would be dangerous to do so.
3133 *
3134 * The vnode counts are only kept on regular files
3135 */
3136 if ((*vpp)->v_type == VREG) {
3137 if (mode & FREAD)
3138 atomic_add_32(&((*vpp)->v_rdcnt), 1);
3139 if (mode & FWRITE)
3140 atomic_add_32(&((*vpp)->v_wrcnt), 1);
3141 }
3142
3143 VOPXID_MAP_CR(vp, cr);
3144
3145 ret = (*(*(vpp))->v_op->vop_open)(vpp, mode, cr, ct);
3146
3147 if (ret) {
3148 /*
3149 * Use the saved vp just in case the vnode ptr got trashed
3150 * by the error.
3151 */
3152 VOPSTATS_UPDATE(vp, open);
3153 if ((vp->v_type == VREG) && (mode & FREAD))
3154 atomic_add_32(&(vp->v_rdcnt), -1);
3155 if ((vp->v_type == VREG) && (mode & FWRITE))
3156 atomic_add_32(&(vp->v_wrcnt), -1);
3157 } else {
3158 /*
3159 * Some filesystems will return a different vnode,
3160 * but the same path was still used to open it.
3161 * So if we do change the vnode and need to
3162 * copy over the path, do so here, rather than special
3163 * casing each filesystem. Adjust the vnode counts to
3164 * reflect the vnode switch.
3165 */
3166 VOPSTATS_UPDATE(*vpp, open);
3167 if (*vpp != vp && *vpp != NULL) {
3168 vn_copypath(vp, *vpp);
3169 if (((*vpp)->v_type == VREG) && (mode & FREAD))
3170 atomic_add_32(&((*vpp)->v_rdcnt), 1);
3171 if ((vp->v_type == VREG) && (mode & FREAD))
3172 atomic_add_32(&(vp->v_rdcnt), -1);
3173 if (((*vpp)->v_type == VREG) && (mode & FWRITE))
3174 atomic_add_32(&((*vpp)->v_wrcnt), 1);
3175 if ((vp->v_type == VREG) && (mode & FWRITE))
3176 atomic_add_32(&(vp->v_wrcnt), -1);
3177 }
3178 }
3179 VN_RELE(vp);
3180 return (ret);
3181 }
3182
3183 int
fop_close(vnode_t * vp,int flag,int count,offset_t offset,cred_t * cr,caller_context_t * ct)3184 fop_close(
3185 vnode_t *vp,
3186 int flag,
3187 int count,
3188 offset_t offset,
3189 cred_t *cr,
3190 caller_context_t *ct)
3191 {
3192 int err;
3193
3194 VOPXID_MAP_CR(vp, cr);
3195
3196 err = (*(vp)->v_op->vop_close)(vp, flag, count, offset, cr, ct);
3197 VOPSTATS_UPDATE(vp, close);
3198 /*
3199 * Check passed in count to handle possible dups. Vnode counts are only
3200 * kept on regular files
3201 */
3202 if ((vp->v_type == VREG) && (count == 1)) {
3203 if (flag & FREAD) {
3204 ASSERT(vp->v_rdcnt > 0);
3205 atomic_add_32(&(vp->v_rdcnt), -1);
3206 }
3207 if (flag & FWRITE) {
3208 ASSERT(vp->v_wrcnt > 0);
3209 atomic_add_32(&(vp->v_wrcnt), -1);
3210 }
3211 }
3212 return (err);
3213 }
3214
3215 int
fop_read(vnode_t * vp,uio_t * uiop,int ioflag,cred_t * cr,caller_context_t * ct)3216 fop_read(
3217 vnode_t *vp,
3218 uio_t *uiop,
3219 int ioflag,
3220 cred_t *cr,
3221 caller_context_t *ct)
3222 {
3223 int err;
3224 ssize_t resid_start = uiop->uio_resid;
3225
3226 VOPXID_MAP_CR(vp, cr);
3227
3228 err = (*(vp)->v_op->vop_read)(vp, uiop, ioflag, cr, ct);
3229 VOPSTATS_UPDATE_IO(vp, read,
3230 read_bytes, (resid_start - uiop->uio_resid));
3231 return (err);
3232 }
3233
3234 int
fop_write(vnode_t * vp,uio_t * uiop,int ioflag,cred_t * cr,caller_context_t * ct)3235 fop_write(
3236 vnode_t *vp,
3237 uio_t *uiop,
3238 int ioflag,
3239 cred_t *cr,
3240 caller_context_t *ct)
3241 {
3242 int err;
3243 ssize_t resid_start = uiop->uio_resid;
3244
3245 VOPXID_MAP_CR(vp, cr);
3246
3247 err = (*(vp)->v_op->vop_write)(vp, uiop, ioflag, cr, ct);
3248 VOPSTATS_UPDATE_IO(vp, write,
3249 write_bytes, (resid_start - uiop->uio_resid));
3250 return (err);
3251 }
3252
3253 int
fop_ioctl(vnode_t * vp,int cmd,intptr_t arg,int flag,cred_t * cr,int * rvalp,caller_context_t * ct)3254 fop_ioctl(
3255 vnode_t *vp,
3256 int cmd,
3257 intptr_t arg,
3258 int flag,
3259 cred_t *cr,
3260 int *rvalp,
3261 caller_context_t *ct)
3262 {
3263 int err;
3264
3265 VOPXID_MAP_CR(vp, cr);
3266
3267 err = (*(vp)->v_op->vop_ioctl)(vp, cmd, arg, flag, cr, rvalp, ct);
3268 VOPSTATS_UPDATE(vp, ioctl);
3269 return (err);
3270 }
3271
3272 int
fop_setfl(vnode_t * vp,int oflags,int nflags,cred_t * cr,caller_context_t * ct)3273 fop_setfl(
3274 vnode_t *vp,
3275 int oflags,
3276 int nflags,
3277 cred_t *cr,
3278 caller_context_t *ct)
3279 {
3280 int err;
3281
3282 VOPXID_MAP_CR(vp, cr);
3283
3284 err = (*(vp)->v_op->vop_setfl)(vp, oflags, nflags, cr, ct);
3285 VOPSTATS_UPDATE(vp, setfl);
3286 return (err);
3287 }
3288
3289 int
fop_getattr(vnode_t * vp,vattr_t * vap,int flags,cred_t * cr,caller_context_t * ct)3290 fop_getattr(
3291 vnode_t *vp,
3292 vattr_t *vap,
3293 int flags,
3294 cred_t *cr,
3295 caller_context_t *ct)
3296 {
3297 int err;
3298
3299 VOPXID_MAP_CR(vp, cr);
3300
3301 /*
3302 * If this file system doesn't understand the xvattr extensions
3303 * then turn off the xvattr bit.
3304 */
3305 if (vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR) == 0) {
3306 vap->va_mask &= ~AT_XVATTR;
3307 }
3308
3309 /*
3310 * We're only allowed to skip the ACL check iff we used a 32 bit
3311 * ACE mask with VOP_ACCESS() to determine permissions.
3312 */
3313 if ((flags & ATTR_NOACLCHECK) &&
3314 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
3315 return (EINVAL);
3316 }
3317 err = (*(vp)->v_op->vop_getattr)(vp, vap, flags, cr, ct);
3318 VOPSTATS_UPDATE(vp, getattr);
3319 return (err);
3320 }
3321
3322 int
fop_setattr(vnode_t * vp,vattr_t * vap,int flags,cred_t * cr,caller_context_t * ct)3323 fop_setattr(
3324 vnode_t *vp,
3325 vattr_t *vap,
3326 int flags,
3327 cred_t *cr,
3328 caller_context_t *ct)
3329 {
3330 int err;
3331
3332 VOPXID_MAP_CR(vp, cr);
3333
3334 /*
3335 * If this file system doesn't understand the xvattr extensions
3336 * then turn off the xvattr bit.
3337 */
3338 if (vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR) == 0) {
3339 vap->va_mask &= ~AT_XVATTR;
3340 }
3341
3342 /*
3343 * We're only allowed to skip the ACL check iff we used a 32 bit
3344 * ACE mask with VOP_ACCESS() to determine permissions.
3345 */
3346 if ((flags & ATTR_NOACLCHECK) &&
3347 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
3348 return (EINVAL);
3349 }
3350 err = (*(vp)->v_op->vop_setattr)(vp, vap, flags, cr, ct);
3351 VOPSTATS_UPDATE(vp, setattr);
3352 return (err);
3353 }
3354
3355 int
fop_access(vnode_t * vp,int mode,int flags,cred_t * cr,caller_context_t * ct)3356 fop_access(
3357 vnode_t *vp,
3358 int mode,
3359 int flags,
3360 cred_t *cr,
3361 caller_context_t *ct)
3362 {
3363 int err;
3364
3365 if ((flags & V_ACE_MASK) &&
3366 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
3367 return (EINVAL);
3368 }
3369
3370 VOPXID_MAP_CR(vp, cr);
3371
3372 err = (*(vp)->v_op->vop_access)(vp, mode, flags, cr, ct);
3373 VOPSTATS_UPDATE(vp, access);
3374 return (err);
3375 }
3376
3377 int
fop_lookup(vnode_t * dvp,char * nm,vnode_t ** vpp,pathname_t * pnp,int flags,vnode_t * rdir,cred_t * cr,caller_context_t * ct,int * deflags,pathname_t * ppnp)3378 fop_lookup(
3379 vnode_t *dvp,
3380 char *nm,
3381 vnode_t **vpp,
3382 pathname_t *pnp,
3383 int flags,
3384 vnode_t *rdir,
3385 cred_t *cr,
3386 caller_context_t *ct,
3387 int *deflags, /* Returned per-dirent flags */
3388 pathname_t *ppnp) /* Returned case-preserved name in directory */
3389 {
3390 int ret;
3391
3392 /*
3393 * If this file system doesn't support case-insensitive access
3394 * and said access is requested, fail quickly. It is required
3395 * that if the vfs supports case-insensitive lookup, it also
3396 * supports extended dirent flags.
3397 */
3398 if (flags & FIGNORECASE &&
3399 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3400 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3401 return (EINVAL);
3402
3403 VOPXID_MAP_CR(dvp, cr);
3404
3405 if ((flags & LOOKUP_XATTR) && (flags & LOOKUP_HAVE_SYSATTR_DIR) == 0) {
3406 ret = xattr_dir_lookup(dvp, vpp, flags, cr);
3407 } else {
3408 ret = (*(dvp)->v_op->vop_lookup)
3409 (dvp, nm, vpp, pnp, flags, rdir, cr, ct, deflags, ppnp);
3410 }
3411 if (ret == 0 && *vpp) {
3412 VOPSTATS_UPDATE(*vpp, lookup);
3413 if ((*vpp)->v_path == NULL) {
3414 vn_setpath(rootdir, dvp, *vpp, nm, strlen(nm));
3415 }
3416 }
3417
3418 return (ret);
3419 }
3420
3421 int
fop_create(vnode_t * dvp,char * name,vattr_t * vap,vcexcl_t excl,int mode,vnode_t ** vpp,cred_t * cr,int flags,caller_context_t * ct,vsecattr_t * vsecp)3422 fop_create(
3423 vnode_t *dvp,
3424 char *name,
3425 vattr_t *vap,
3426 vcexcl_t excl,
3427 int mode,
3428 vnode_t **vpp,
3429 cred_t *cr,
3430 int flags,
3431 caller_context_t *ct,
3432 vsecattr_t *vsecp) /* ACL to set during create */
3433 {
3434 int ret;
3435
3436 if (vsecp != NULL &&
3437 vfs_has_feature(dvp->v_vfsp, VFSFT_ACLONCREATE) == 0) {
3438 return (EINVAL);
3439 }
3440 /*
3441 * If this file system doesn't support case-insensitive access
3442 * and said access is requested, fail quickly.
3443 */
3444 if (flags & FIGNORECASE &&
3445 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3446 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3447 return (EINVAL);
3448
3449 VOPXID_MAP_CR(dvp, cr);
3450
3451 ret = (*(dvp)->v_op->vop_create)
3452 (dvp, name, vap, excl, mode, vpp, cr, flags, ct, vsecp);
3453 if (ret == 0 && *vpp) {
3454 VOPSTATS_UPDATE(*vpp, create);
3455 if ((*vpp)->v_path == NULL) {
3456 vn_setpath(rootdir, dvp, *vpp, name, strlen(name));
3457 }
3458 }
3459
3460 return (ret);
3461 }
3462
3463 int
fop_remove(vnode_t * dvp,char * nm,cred_t * cr,caller_context_t * ct,int flags)3464 fop_remove(
3465 vnode_t *dvp,
3466 char *nm,
3467 cred_t *cr,
3468 caller_context_t *ct,
3469 int flags)
3470 {
3471 int err;
3472
3473 /*
3474 * If this file system doesn't support case-insensitive access
3475 * and said access is requested, fail quickly.
3476 */
3477 if (flags & FIGNORECASE &&
3478 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3479 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3480 return (EINVAL);
3481
3482 VOPXID_MAP_CR(dvp, cr);
3483
3484 err = (*(dvp)->v_op->vop_remove)(dvp, nm, cr, ct, flags);
3485 VOPSTATS_UPDATE(dvp, remove);
3486 return (err);
3487 }
3488
3489 int
fop_link(vnode_t * tdvp,vnode_t * svp,char * tnm,cred_t * cr,caller_context_t * ct,int flags)3490 fop_link(
3491 vnode_t *tdvp,
3492 vnode_t *svp,
3493 char *tnm,
3494 cred_t *cr,
3495 caller_context_t *ct,
3496 int flags)
3497 {
3498 int err;
3499
3500 /*
3501 * If the target file system doesn't support case-insensitive access
3502 * and said access is requested, fail quickly.
3503 */
3504 if (flags & FIGNORECASE &&
3505 (vfs_has_feature(tdvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3506 vfs_has_feature(tdvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3507 return (EINVAL);
3508
3509 VOPXID_MAP_CR(tdvp, cr);
3510
3511 err = (*(tdvp)->v_op->vop_link)(tdvp, svp, tnm, cr, ct, flags);
3512 VOPSTATS_UPDATE(tdvp, link);
3513 return (err);
3514 }
3515
3516 int
fop_rename(vnode_t * sdvp,char * snm,vnode_t * tdvp,char * tnm,cred_t * cr,caller_context_t * ct,int flags)3517 fop_rename(
3518 vnode_t *sdvp,
3519 char *snm,
3520 vnode_t *tdvp,
3521 char *tnm,
3522 cred_t *cr,
3523 caller_context_t *ct,
3524 int flags)
3525 {
3526 int err;
3527
3528 /*
3529 * If the file system involved does not support
3530 * case-insensitive access and said access is requested, fail
3531 * quickly.
3532 */
3533 if (flags & FIGNORECASE &&
3534 ((vfs_has_feature(sdvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3535 vfs_has_feature(sdvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0)))
3536 return (EINVAL);
3537
3538 VOPXID_MAP_CR(tdvp, cr);
3539
3540 err = (*(sdvp)->v_op->vop_rename)(sdvp, snm, tdvp, tnm, cr, ct, flags);
3541 VOPSTATS_UPDATE(sdvp, rename);
3542 return (err);
3543 }
3544
3545 int
fop_mkdir(vnode_t * dvp,char * dirname,vattr_t * vap,vnode_t ** vpp,cred_t * cr,caller_context_t * ct,int flags,vsecattr_t * vsecp)3546 fop_mkdir(
3547 vnode_t *dvp,
3548 char *dirname,
3549 vattr_t *vap,
3550 vnode_t **vpp,
3551 cred_t *cr,
3552 caller_context_t *ct,
3553 int flags,
3554 vsecattr_t *vsecp) /* ACL to set during create */
3555 {
3556 int ret;
3557
3558 if (vsecp != NULL &&
3559 vfs_has_feature(dvp->v_vfsp, VFSFT_ACLONCREATE) == 0) {
3560 return (EINVAL);
3561 }
3562 /*
3563 * If this file system doesn't support case-insensitive access
3564 * and said access is requested, fail quickly.
3565 */
3566 if (flags & FIGNORECASE &&
3567 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3568 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3569 return (EINVAL);
3570
3571 VOPXID_MAP_CR(dvp, cr);
3572
3573 ret = (*(dvp)->v_op->vop_mkdir)
3574 (dvp, dirname, vap, vpp, cr, ct, flags, vsecp);
3575 if (ret == 0 && *vpp) {
3576 VOPSTATS_UPDATE(*vpp, mkdir);
3577 if ((*vpp)->v_path == NULL) {
3578 vn_setpath(rootdir, dvp, *vpp, dirname,
3579 strlen(dirname));
3580 }
3581 }
3582
3583 return (ret);
3584 }
3585
3586 int
fop_rmdir(vnode_t * dvp,char * nm,vnode_t * cdir,cred_t * cr,caller_context_t * ct,int flags)3587 fop_rmdir(
3588 vnode_t *dvp,
3589 char *nm,
3590 vnode_t *cdir,
3591 cred_t *cr,
3592 caller_context_t *ct,
3593 int flags)
3594 {
3595 int err;
3596
3597 /*
3598 * If this file system doesn't support case-insensitive access
3599 * and said access is requested, fail quickly.
3600 */
3601 if (flags & FIGNORECASE &&
3602 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3603 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3604 return (EINVAL);
3605
3606 VOPXID_MAP_CR(dvp, cr);
3607
3608 err = (*(dvp)->v_op->vop_rmdir)(dvp, nm, cdir, cr, ct, flags);
3609 VOPSTATS_UPDATE(dvp, rmdir);
3610 return (err);
3611 }
3612
3613 int
fop_readdir(vnode_t * vp,uio_t * uiop,cred_t * cr,int * eofp,caller_context_t * ct,int flags)3614 fop_readdir(
3615 vnode_t *vp,
3616 uio_t *uiop,
3617 cred_t *cr,
3618 int *eofp,
3619 caller_context_t *ct,
3620 int flags)
3621 {
3622 int err;
3623 ssize_t resid_start = uiop->uio_resid;
3624
3625 /*
3626 * If this file system doesn't support retrieving directory
3627 * entry flags and said access is requested, fail quickly.
3628 */
3629 if (flags & V_RDDIR_ENTFLAGS &&
3630 vfs_has_feature(vp->v_vfsp, VFSFT_DIRENTFLAGS) == 0)
3631 return (EINVAL);
3632
3633 VOPXID_MAP_CR(vp, cr);
3634
3635 err = (*(vp)->v_op->vop_readdir)(vp, uiop, cr, eofp, ct, flags);
3636 VOPSTATS_UPDATE_IO(vp, readdir,
3637 readdir_bytes, (resid_start - uiop->uio_resid));
3638 return (err);
3639 }
3640
3641 int
fop_symlink(vnode_t * dvp,char * linkname,vattr_t * vap,char * target,cred_t * cr,caller_context_t * ct,int flags)3642 fop_symlink(
3643 vnode_t *dvp,
3644 char *linkname,
3645 vattr_t *vap,
3646 char *target,
3647 cred_t *cr,
3648 caller_context_t *ct,
3649 int flags)
3650 {
3651 int err;
3652 xvattr_t xvattr;
3653
3654 /*
3655 * If this file system doesn't support case-insensitive access
3656 * and said access is requested, fail quickly.
3657 */
3658 if (flags & FIGNORECASE &&
3659 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3660 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3661 return (EINVAL);
3662
3663 VOPXID_MAP_CR(dvp, cr);
3664
3665 /* check for reparse point */
3666 if ((vfs_has_feature(dvp->v_vfsp, VFSFT_REPARSE)) &&
3667 (strncmp(target, FS_REPARSE_TAG_STR,
3668 strlen(FS_REPARSE_TAG_STR)) == 0)) {
3669 if (!fs_reparse_mark(target, vap, &xvattr))
3670 vap = (vattr_t *)&xvattr;
3671 }
3672
3673 err = (*(dvp)->v_op->vop_symlink)
3674 (dvp, linkname, vap, target, cr, ct, flags);
3675 VOPSTATS_UPDATE(dvp, symlink);
3676 return (err);
3677 }
3678
3679 int
fop_readlink(vnode_t * vp,uio_t * uiop,cred_t * cr,caller_context_t * ct)3680 fop_readlink(
3681 vnode_t *vp,
3682 uio_t *uiop,
3683 cred_t *cr,
3684 caller_context_t *ct)
3685 {
3686 int err;
3687
3688 VOPXID_MAP_CR(vp, cr);
3689
3690 err = (*(vp)->v_op->vop_readlink)(vp, uiop, cr, ct);
3691 VOPSTATS_UPDATE(vp, readlink);
3692 return (err);
3693 }
3694
3695 int
fop_fsync(vnode_t * vp,int syncflag,cred_t * cr,caller_context_t * ct)3696 fop_fsync(
3697 vnode_t *vp,
3698 int syncflag,
3699 cred_t *cr,
3700 caller_context_t *ct)
3701 {
3702 int err;
3703
3704 VOPXID_MAP_CR(vp, cr);
3705
3706 err = (*(vp)->v_op->vop_fsync)(vp, syncflag, cr, ct);
3707 VOPSTATS_UPDATE(vp, fsync);
3708 return (err);
3709 }
3710
3711 void
fop_inactive(vnode_t * vp,cred_t * cr,caller_context_t * ct)3712 fop_inactive(
3713 vnode_t *vp,
3714 cred_t *cr,
3715 caller_context_t *ct)
3716 {
3717 /* Need to update stats before vop call since we may lose the vnode */
3718 VOPSTATS_UPDATE(vp, inactive);
3719
3720 VOPXID_MAP_CR(vp, cr);
3721
3722 (*(vp)->v_op->vop_inactive)(vp, cr, ct);
3723 }
3724
3725 int
fop_fid(vnode_t * vp,fid_t * fidp,caller_context_t * ct)3726 fop_fid(
3727 vnode_t *vp,
3728 fid_t *fidp,
3729 caller_context_t *ct)
3730 {
3731 int err;
3732
3733 err = (*(vp)->v_op->vop_fid)(vp, fidp, ct);
3734 VOPSTATS_UPDATE(vp, fid);
3735 return (err);
3736 }
3737
3738 int
fop_rwlock(vnode_t * vp,int write_lock,caller_context_t * ct)3739 fop_rwlock(
3740 vnode_t *vp,
3741 int write_lock,
3742 caller_context_t *ct)
3743 {
3744 int ret;
3745
3746 ret = ((*(vp)->v_op->vop_rwlock)(vp, write_lock, ct));
3747 VOPSTATS_UPDATE(vp, rwlock);
3748 return (ret);
3749 }
3750
3751 void
fop_rwunlock(vnode_t * vp,int write_lock,caller_context_t * ct)3752 fop_rwunlock(
3753 vnode_t *vp,
3754 int write_lock,
3755 caller_context_t *ct)
3756 {
3757 (*(vp)->v_op->vop_rwunlock)(vp, write_lock, ct);
3758 VOPSTATS_UPDATE(vp, rwunlock);
3759 }
3760
3761 int
fop_seek(vnode_t * vp,offset_t ooff,offset_t * noffp,caller_context_t * ct)3762 fop_seek(
3763 vnode_t *vp,
3764 offset_t ooff,
3765 offset_t *noffp,
3766 caller_context_t *ct)
3767 {
3768 int err;
3769
3770 err = (*(vp)->v_op->vop_seek)(vp, ooff, noffp, ct);
3771 VOPSTATS_UPDATE(vp, seek);
3772 return (err);
3773 }
3774
3775 int
fop_cmp(vnode_t * vp1,vnode_t * vp2,caller_context_t * ct)3776 fop_cmp(
3777 vnode_t *vp1,
3778 vnode_t *vp2,
3779 caller_context_t *ct)
3780 {
3781 int err;
3782
3783 err = (*(vp1)->v_op->vop_cmp)(vp1, vp2, ct);
3784 VOPSTATS_UPDATE(vp1, cmp);
3785 return (err);
3786 }
3787
3788 int
fop_frlock(vnode_t * vp,int cmd,flock64_t * bfp,int flag,offset_t offset,struct flk_callback * flk_cbp,cred_t * cr,caller_context_t * ct)3789 fop_frlock(
3790 vnode_t *vp,
3791 int cmd,
3792 flock64_t *bfp,
3793 int flag,
3794 offset_t offset,
3795 struct flk_callback *flk_cbp,
3796 cred_t *cr,
3797 caller_context_t *ct)
3798 {
3799 int err;
3800
3801 VOPXID_MAP_CR(vp, cr);
3802
3803 err = (*(vp)->v_op->vop_frlock)
3804 (vp, cmd, bfp, flag, offset, flk_cbp, cr, ct);
3805 VOPSTATS_UPDATE(vp, frlock);
3806 return (err);
3807 }
3808
3809 int
fop_space(vnode_t * vp,int cmd,flock64_t * bfp,int flag,offset_t offset,cred_t * cr,caller_context_t * ct)3810 fop_space(
3811 vnode_t *vp,
3812 int cmd,
3813 flock64_t *bfp,
3814 int flag,
3815 offset_t offset,
3816 cred_t *cr,
3817 caller_context_t *ct)
3818 {
3819 int err;
3820
3821 VOPXID_MAP_CR(vp, cr);
3822
3823 err = (*(vp)->v_op->vop_space)(vp, cmd, bfp, flag, offset, cr, ct);
3824 VOPSTATS_UPDATE(vp, space);
3825 return (err);
3826 }
3827
3828 int
fop_realvp(vnode_t * vp,vnode_t ** vpp,caller_context_t * ct)3829 fop_realvp(
3830 vnode_t *vp,
3831 vnode_t **vpp,
3832 caller_context_t *ct)
3833 {
3834 int err;
3835
3836 err = (*(vp)->v_op->vop_realvp)(vp, vpp, ct);
3837 VOPSTATS_UPDATE(vp, realvp);
3838 return (err);
3839 }
3840
3841 int
fop_getpage(vnode_t * vp,offset_t off,size_t len,uint_t * protp,page_t ** plarr,size_t plsz,struct seg * seg,caddr_t addr,enum seg_rw rw,cred_t * cr,caller_context_t * ct)3842 fop_getpage(
3843 vnode_t *vp,
3844 offset_t off,
3845 size_t len,
3846 uint_t *protp,
3847 page_t **plarr,
3848 size_t plsz,
3849 struct seg *seg,
3850 caddr_t addr,
3851 enum seg_rw rw,
3852 cred_t *cr,
3853 caller_context_t *ct)
3854 {
3855 int err;
3856
3857 VOPXID_MAP_CR(vp, cr);
3858
3859 err = (*(vp)->v_op->vop_getpage)
3860 (vp, off, len, protp, plarr, plsz, seg, addr, rw, cr, ct);
3861 VOPSTATS_UPDATE(vp, getpage);
3862 return (err);
3863 }
3864
3865 int
fop_putpage(vnode_t * vp,offset_t off,size_t len,int flags,cred_t * cr,caller_context_t * ct)3866 fop_putpage(
3867 vnode_t *vp,
3868 offset_t off,
3869 size_t len,
3870 int flags,
3871 cred_t *cr,
3872 caller_context_t *ct)
3873 {
3874 int err;
3875
3876 VOPXID_MAP_CR(vp, cr);
3877
3878 err = (*(vp)->v_op->vop_putpage)(vp, off, len, flags, cr, ct);
3879 VOPSTATS_UPDATE(vp, putpage);
3880 return (err);
3881 }
3882
3883 int
fop_map(vnode_t * vp,offset_t off,struct as * as,caddr_t * addrp,size_t len,uchar_t prot,uchar_t maxprot,uint_t flags,cred_t * cr,caller_context_t * ct)3884 fop_map(
3885 vnode_t *vp,
3886 offset_t off,
3887 struct as *as,
3888 caddr_t *addrp,
3889 size_t len,
3890 uchar_t prot,
3891 uchar_t maxprot,
3892 uint_t flags,
3893 cred_t *cr,
3894 caller_context_t *ct)
3895 {
3896 int err;
3897
3898 VOPXID_MAP_CR(vp, cr);
3899
3900 err = (*(vp)->v_op->vop_map)
3901 (vp, off, as, addrp, len, prot, maxprot, flags, cr, ct);
3902 VOPSTATS_UPDATE(vp, map);
3903 return (err);
3904 }
3905
3906 int
fop_addmap(vnode_t * vp,offset_t off,struct as * as,caddr_t addr,size_t len,uchar_t prot,uchar_t maxprot,uint_t flags,cred_t * cr,caller_context_t * ct)3907 fop_addmap(
3908 vnode_t *vp,
3909 offset_t off,
3910 struct as *as,
3911 caddr_t addr,
3912 size_t len,
3913 uchar_t prot,
3914 uchar_t maxprot,
3915 uint_t flags,
3916 cred_t *cr,
3917 caller_context_t *ct)
3918 {
3919 int error;
3920 u_longlong_t delta;
3921
3922 VOPXID_MAP_CR(vp, cr);
3923
3924 error = (*(vp)->v_op->vop_addmap)
3925 (vp, off, as, addr, len, prot, maxprot, flags, cr, ct);
3926
3927 if ((!error) && (vp->v_type == VREG)) {
3928 delta = (u_longlong_t)btopr(len);
3929 /*
3930 * If file is declared MAP_PRIVATE, it can't be written back
3931 * even if open for write. Handle as read.
3932 */
3933 if (flags & MAP_PRIVATE) {
3934 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
3935 (int64_t)delta);
3936 } else {
3937 /*
3938 * atomic_add_64 forces the fetch of a 64 bit value to
3939 * be atomic on 32 bit machines
3940 */
3941 if (maxprot & PROT_WRITE)
3942 atomic_add_64((uint64_t *)(&(vp->v_mmap_write)),
3943 (int64_t)delta);
3944 if (maxprot & PROT_READ)
3945 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
3946 (int64_t)delta);
3947 if (maxprot & PROT_EXEC)
3948 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
3949 (int64_t)delta);
3950 }
3951 }
3952 VOPSTATS_UPDATE(vp, addmap);
3953 return (error);
3954 }
3955
3956 int
fop_delmap(vnode_t * vp,offset_t off,struct as * as,caddr_t addr,size_t len,uint_t prot,uint_t maxprot,uint_t flags,cred_t * cr,caller_context_t * ct)3957 fop_delmap(
3958 vnode_t *vp,
3959 offset_t off,
3960 struct as *as,
3961 caddr_t addr,
3962 size_t len,
3963 uint_t prot,
3964 uint_t maxprot,
3965 uint_t flags,
3966 cred_t *cr,
3967 caller_context_t *ct)
3968 {
3969 int error;
3970 u_longlong_t delta;
3971
3972 VOPXID_MAP_CR(vp, cr);
3973
3974 error = (*(vp)->v_op->vop_delmap)
3975 (vp, off, as, addr, len, prot, maxprot, flags, cr, ct);
3976
3977 /*
3978 * NFS calls into delmap twice, the first time
3979 * it simply establishes a callback mechanism and returns EAGAIN
3980 * while the real work is being done upon the second invocation.
3981 * We have to detect this here and only decrement the counts upon
3982 * the second delmap request.
3983 */
3984 if ((error != EAGAIN) && (vp->v_type == VREG)) {
3985
3986 delta = (u_longlong_t)btopr(len);
3987
3988 if (flags & MAP_PRIVATE) {
3989 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
3990 (int64_t)(-delta));
3991 } else {
3992 /*
3993 * atomic_add_64 forces the fetch of a 64 bit value
3994 * to be atomic on 32 bit machines
3995 */
3996 if (maxprot & PROT_WRITE)
3997 atomic_add_64((uint64_t *)(&(vp->v_mmap_write)),
3998 (int64_t)(-delta));
3999 if (maxprot & PROT_READ)
4000 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4001 (int64_t)(-delta));
4002 if (maxprot & PROT_EXEC)
4003 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4004 (int64_t)(-delta));
4005 }
4006 }
4007 VOPSTATS_UPDATE(vp, delmap);
4008 return (error);
4009 }
4010
4011
4012 int
fop_poll(vnode_t * vp,short events,int anyyet,short * reventsp,struct pollhead ** phpp,caller_context_t * ct)4013 fop_poll(
4014 vnode_t *vp,
4015 short events,
4016 int anyyet,
4017 short *reventsp,
4018 struct pollhead **phpp,
4019 caller_context_t *ct)
4020 {
4021 int err;
4022
4023 err = (*(vp)->v_op->vop_poll)(vp, events, anyyet, reventsp, phpp, ct);
4024 VOPSTATS_UPDATE(vp, poll);
4025 return (err);
4026 }
4027
4028 int
fop_dump(vnode_t * vp,caddr_t addr,offset_t lbdn,offset_t dblks,caller_context_t * ct)4029 fop_dump(
4030 vnode_t *vp,
4031 caddr_t addr,
4032 offset_t lbdn,
4033 offset_t dblks,
4034 caller_context_t *ct)
4035 {
4036 int err;
4037
4038 /* ensure lbdn and dblks can be passed safely to bdev_dump */
4039 if ((lbdn != (daddr_t)lbdn) || (dblks != (int)dblks))
4040 return (EIO);
4041
4042 err = (*(vp)->v_op->vop_dump)(vp, addr, lbdn, dblks, ct);
4043 VOPSTATS_UPDATE(vp, dump);
4044 return (err);
4045 }
4046
4047 int
fop_pathconf(vnode_t * vp,int cmd,ulong_t * valp,cred_t * cr,caller_context_t * ct)4048 fop_pathconf(
4049 vnode_t *vp,
4050 int cmd,
4051 ulong_t *valp,
4052 cred_t *cr,
4053 caller_context_t *ct)
4054 {
4055 int err;
4056
4057 VOPXID_MAP_CR(vp, cr);
4058
4059 err = (*(vp)->v_op->vop_pathconf)(vp, cmd, valp, cr, ct);
4060 VOPSTATS_UPDATE(vp, pathconf);
4061 return (err);
4062 }
4063
4064 int
fop_pageio(vnode_t * vp,struct page * pp,u_offset_t io_off,size_t io_len,int flags,cred_t * cr,caller_context_t * ct)4065 fop_pageio(
4066 vnode_t *vp,
4067 struct page *pp,
4068 u_offset_t io_off,
4069 size_t io_len,
4070 int flags,
4071 cred_t *cr,
4072 caller_context_t *ct)
4073 {
4074 int err;
4075
4076 VOPXID_MAP_CR(vp, cr);
4077
4078 err = (*(vp)->v_op->vop_pageio)(vp, pp, io_off, io_len, flags, cr, ct);
4079 VOPSTATS_UPDATE(vp, pageio);
4080 return (err);
4081 }
4082
4083 int
fop_dumpctl(vnode_t * vp,int action,offset_t * blkp,caller_context_t * ct)4084 fop_dumpctl(
4085 vnode_t *vp,
4086 int action,
4087 offset_t *blkp,
4088 caller_context_t *ct)
4089 {
4090 int err;
4091 err = (*(vp)->v_op->vop_dumpctl)(vp, action, blkp, ct);
4092 VOPSTATS_UPDATE(vp, dumpctl);
4093 return (err);
4094 }
4095
4096 void
fop_dispose(vnode_t * vp,page_t * pp,int flag,int dn,cred_t * cr,caller_context_t * ct)4097 fop_dispose(
4098 vnode_t *vp,
4099 page_t *pp,
4100 int flag,
4101 int dn,
4102 cred_t *cr,
4103 caller_context_t *ct)
4104 {
4105 /* Must do stats first since it's possible to lose the vnode */
4106 VOPSTATS_UPDATE(vp, dispose);
4107
4108 VOPXID_MAP_CR(vp, cr);
4109
4110 (*(vp)->v_op->vop_dispose)(vp, pp, flag, dn, cr, ct);
4111 }
4112
4113 int
fop_setsecattr(vnode_t * vp,vsecattr_t * vsap,int flag,cred_t * cr,caller_context_t * ct)4114 fop_setsecattr(
4115 vnode_t *vp,
4116 vsecattr_t *vsap,
4117 int flag,
4118 cred_t *cr,
4119 caller_context_t *ct)
4120 {
4121 int err;
4122
4123 VOPXID_MAP_CR(vp, cr);
4124
4125 /*
4126 * We're only allowed to skip the ACL check iff we used a 32 bit
4127 * ACE mask with VOP_ACCESS() to determine permissions.
4128 */
4129 if ((flag & ATTR_NOACLCHECK) &&
4130 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
4131 return (EINVAL);
4132 }
4133 err = (*(vp)->v_op->vop_setsecattr) (vp, vsap, flag, cr, ct);
4134 VOPSTATS_UPDATE(vp, setsecattr);
4135 return (err);
4136 }
4137
4138 int
fop_getsecattr(vnode_t * vp,vsecattr_t * vsap,int flag,cred_t * cr,caller_context_t * ct)4139 fop_getsecattr(
4140 vnode_t *vp,
4141 vsecattr_t *vsap,
4142 int flag,
4143 cred_t *cr,
4144 caller_context_t *ct)
4145 {
4146 int err;
4147
4148 /*
4149 * We're only allowed to skip the ACL check iff we used a 32 bit
4150 * ACE mask with VOP_ACCESS() to determine permissions.
4151 */
4152 if ((flag & ATTR_NOACLCHECK) &&
4153 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
4154 return (EINVAL);
4155 }
4156
4157 VOPXID_MAP_CR(vp, cr);
4158
4159 err = (*(vp)->v_op->vop_getsecattr) (vp, vsap, flag, cr, ct);
4160 VOPSTATS_UPDATE(vp, getsecattr);
4161 return (err);
4162 }
4163
4164 int
fop_shrlock(vnode_t * vp,int cmd,struct shrlock * shr,int flag,cred_t * cr,caller_context_t * ct)4165 fop_shrlock(
4166 vnode_t *vp,
4167 int cmd,
4168 struct shrlock *shr,
4169 int flag,
4170 cred_t *cr,
4171 caller_context_t *ct)
4172 {
4173 int err;
4174
4175 VOPXID_MAP_CR(vp, cr);
4176
4177 err = (*(vp)->v_op->vop_shrlock)(vp, cmd, shr, flag, cr, ct);
4178 VOPSTATS_UPDATE(vp, shrlock);
4179 return (err);
4180 }
4181
4182 int
fop_vnevent(vnode_t * vp,vnevent_t vnevent,vnode_t * dvp,char * fnm,caller_context_t * ct)4183 fop_vnevent(vnode_t *vp, vnevent_t vnevent, vnode_t *dvp, char *fnm,
4184 caller_context_t *ct)
4185 {
4186 int err;
4187
4188 err = (*(vp)->v_op->vop_vnevent)(vp, vnevent, dvp, fnm, ct);
4189 VOPSTATS_UPDATE(vp, vnevent);
4190 return (err);
4191 }
4192
4193 int
fop_reqzcbuf(vnode_t * vp,enum uio_rw ioflag,xuio_t * uiop,cred_t * cr,caller_context_t * ct)4194 fop_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *uiop, cred_t *cr,
4195 caller_context_t *ct)
4196 {
4197 int err;
4198
4199 if (vfs_has_feature(vp->v_vfsp, VFSFT_ZEROCOPY_SUPPORTED) == 0)
4200 return (ENOTSUP);
4201 err = (*(vp)->v_op->vop_reqzcbuf)(vp, ioflag, uiop, cr, ct);
4202 VOPSTATS_UPDATE(vp, reqzcbuf);
4203 return (err);
4204 }
4205
4206 int
fop_retzcbuf(vnode_t * vp,xuio_t * uiop,cred_t * cr,caller_context_t * ct)4207 fop_retzcbuf(vnode_t *vp, xuio_t *uiop, cred_t *cr, caller_context_t *ct)
4208 {
4209 int err;
4210
4211 if (vfs_has_feature(vp->v_vfsp, VFSFT_ZEROCOPY_SUPPORTED) == 0)
4212 return (ENOTSUP);
4213 err = (*(vp)->v_op->vop_retzcbuf)(vp, uiop, cr, ct);
4214 VOPSTATS_UPDATE(vp, retzcbuf);
4215 return (err);
4216 }
4217
4218 /*
4219 * Default destructor
4220 * Needed because NULL destructor means that the key is unused
4221 */
4222 /* ARGSUSED */
4223 void
vsd_defaultdestructor(void * value)4224 vsd_defaultdestructor(void *value)
4225 {}
4226
4227 /*
4228 * Create a key (index into per vnode array)
4229 * Locks out vsd_create, vsd_destroy, and vsd_free
4230 * May allocate memory with lock held
4231 */
4232 void
vsd_create(uint_t * keyp,void (* destructor)(void *))4233 vsd_create(uint_t *keyp, void (*destructor)(void *))
4234 {
4235 int i;
4236 uint_t nkeys;
4237
4238 /*
4239 * if key is allocated, do nothing
4240 */
4241 mutex_enter(&vsd_lock);
4242 if (*keyp) {
4243 mutex_exit(&vsd_lock);
4244 return;
4245 }
4246 /*
4247 * find an unused key
4248 */
4249 if (destructor == NULL)
4250 destructor = vsd_defaultdestructor;
4251
4252 for (i = 0; i < vsd_nkeys; ++i)
4253 if (vsd_destructor[i] == NULL)
4254 break;
4255
4256 /*
4257 * if no unused keys, increase the size of the destructor array
4258 */
4259 if (i == vsd_nkeys) {
4260 if ((nkeys = (vsd_nkeys << 1)) == 0)
4261 nkeys = 1;
4262 vsd_destructor =
4263 (void (**)(void *))vsd_realloc((void *)vsd_destructor,
4264 (size_t)(vsd_nkeys * sizeof (void (*)(void *))),
4265 (size_t)(nkeys * sizeof (void (*)(void *))));
4266 vsd_nkeys = nkeys;
4267 }
4268
4269 /*
4270 * allocate the next available unused key
4271 */
4272 vsd_destructor[i] = destructor;
4273 *keyp = i + 1;
4274
4275 /* create vsd_list, if it doesn't exist */
4276 if (vsd_list == NULL) {
4277 vsd_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
4278 list_create(vsd_list, sizeof (struct vsd_node),
4279 offsetof(struct vsd_node, vs_nodes));
4280 }
4281
4282 mutex_exit(&vsd_lock);
4283 }
4284
4285 /*
4286 * Destroy a key
4287 *
4288 * Assumes that the caller is preventing vsd_set and vsd_get
4289 * Locks out vsd_create, vsd_destroy, and vsd_free
4290 * May free memory with lock held
4291 */
4292 void
vsd_destroy(uint_t * keyp)4293 vsd_destroy(uint_t *keyp)
4294 {
4295 uint_t key;
4296 struct vsd_node *vsd;
4297
4298 /*
4299 * protect the key namespace and our destructor lists
4300 */
4301 mutex_enter(&vsd_lock);
4302 key = *keyp;
4303 *keyp = 0;
4304
4305 ASSERT(key <= vsd_nkeys);
4306
4307 /*
4308 * if the key is valid
4309 */
4310 if (key != 0) {
4311 uint_t k = key - 1;
4312 /*
4313 * for every vnode with VSD, call key's destructor
4314 */
4315 for (vsd = list_head(vsd_list); vsd != NULL;
4316 vsd = list_next(vsd_list, vsd)) {
4317 /*
4318 * no VSD for key in this vnode
4319 */
4320 if (key > vsd->vs_nkeys)
4321 continue;
4322 /*
4323 * call destructor for key
4324 */
4325 if (vsd->vs_value[k] && vsd_destructor[k])
4326 (*vsd_destructor[k])(vsd->vs_value[k]);
4327 /*
4328 * reset value for key
4329 */
4330 vsd->vs_value[k] = NULL;
4331 }
4332 /*
4333 * actually free the key (NULL destructor == unused)
4334 */
4335 vsd_destructor[k] = NULL;
4336 }
4337
4338 mutex_exit(&vsd_lock);
4339 }
4340
4341 /*
4342 * Quickly return the per vnode value that was stored with the specified key
4343 * Assumes the caller is protecting key from vsd_create and vsd_destroy
4344 * Assumes the caller is holding v_vsd_lock to protect the vsd.
4345 */
4346 void *
vsd_get(vnode_t * vp,uint_t key)4347 vsd_get(vnode_t *vp, uint_t key)
4348 {
4349 struct vsd_node *vsd;
4350
4351 ASSERT(vp != NULL);
4352 ASSERT(mutex_owned(&vp->v_vsd_lock));
4353
4354 vsd = vp->v_vsd;
4355
4356 if (key && vsd != NULL && key <= vsd->vs_nkeys)
4357 return (vsd->vs_value[key - 1]);
4358 return (NULL);
4359 }
4360
4361 /*
4362 * Set a per vnode value indexed with the specified key
4363 * Assumes the caller is holding v_vsd_lock to protect the vsd.
4364 */
4365 int
vsd_set(vnode_t * vp,uint_t key,void * value)4366 vsd_set(vnode_t *vp, uint_t key, void *value)
4367 {
4368 struct vsd_node *vsd;
4369
4370 ASSERT(vp != NULL);
4371 ASSERT(mutex_owned(&vp->v_vsd_lock));
4372
4373 if (key == 0)
4374 return (EINVAL);
4375
4376 vsd = vp->v_vsd;
4377 if (vsd == NULL)
4378 vsd = vp->v_vsd = kmem_zalloc(sizeof (*vsd), KM_SLEEP);
4379
4380 /*
4381 * If the vsd was just allocated, vs_nkeys will be 0, so the following
4382 * code won't happen and we will continue down and allocate space for
4383 * the vs_value array.
4384 * If the caller is replacing one value with another, then it is up
4385 * to the caller to free/rele/destroy the previous value (if needed).
4386 */
4387 if (key <= vsd->vs_nkeys) {
4388 vsd->vs_value[key - 1] = value;
4389 return (0);
4390 }
4391
4392 ASSERT(key <= vsd_nkeys);
4393
4394 if (vsd->vs_nkeys == 0) {
4395 mutex_enter(&vsd_lock); /* lock out vsd_destroy() */
4396 /*
4397 * Link onto list of all VSD nodes.
4398 */
4399 list_insert_head(vsd_list, vsd);
4400 mutex_exit(&vsd_lock);
4401 }
4402
4403 /*
4404 * Allocate vnode local storage and set the value for key
4405 */
4406 vsd->vs_value = vsd_realloc(vsd->vs_value,
4407 vsd->vs_nkeys * sizeof (void *),
4408 key * sizeof (void *));
4409 vsd->vs_nkeys = key;
4410 vsd->vs_value[key - 1] = value;
4411
4412 return (0);
4413 }
4414
4415 /*
4416 * Called from vn_free() to run the destructor function for each vsd
4417 * Locks out vsd_create and vsd_destroy
4418 * Assumes that the destructor *DOES NOT* use vsd
4419 */
4420 void
vsd_free(vnode_t * vp)4421 vsd_free(vnode_t *vp)
4422 {
4423 int i;
4424 struct vsd_node *vsd = vp->v_vsd;
4425
4426 if (vsd == NULL)
4427 return;
4428
4429 if (vsd->vs_nkeys == 0) {
4430 kmem_free(vsd, sizeof (*vsd));
4431 vp->v_vsd = NULL;
4432 return;
4433 }
4434
4435 /*
4436 * lock out vsd_create and vsd_destroy, call
4437 * the destructor, and mark the value as destroyed.
4438 */
4439 mutex_enter(&vsd_lock);
4440
4441 for (i = 0; i < vsd->vs_nkeys; i++) {
4442 if (vsd->vs_value[i] && vsd_destructor[i])
4443 (*vsd_destructor[i])(vsd->vs_value[i]);
4444 vsd->vs_value[i] = NULL;
4445 }
4446
4447 /*
4448 * remove from linked list of VSD nodes
4449 */
4450 list_remove(vsd_list, vsd);
4451
4452 mutex_exit(&vsd_lock);
4453
4454 /*
4455 * free up the VSD
4456 */
4457 kmem_free(vsd->vs_value, vsd->vs_nkeys * sizeof (void *));
4458 kmem_free(vsd, sizeof (struct vsd_node));
4459 vp->v_vsd = NULL;
4460 }
4461
4462 /*
4463 * realloc
4464 */
4465 static void *
vsd_realloc(void * old,size_t osize,size_t nsize)4466 vsd_realloc(void *old, size_t osize, size_t nsize)
4467 {
4468 void *new;
4469
4470 new = kmem_zalloc(nsize, KM_SLEEP);
4471 if (old) {
4472 bcopy(old, new, osize);
4473 kmem_free(old, osize);
4474 }
4475 return (new);
4476 }
4477
4478 /*
4479 * Setup the extensible system attribute for creating a reparse point.
4480 * The symlink data 'target' is validated for proper format of a reparse
4481 * string and a check also made to make sure the symlink data does not
4482 * point to an existing file.
4483 *
4484 * return 0 if ok else -1.
4485 */
4486 static int
fs_reparse_mark(char * target,vattr_t * vap,xvattr_t * xvattr)4487 fs_reparse_mark(char *target, vattr_t *vap, xvattr_t *xvattr)
4488 {
4489 xoptattr_t *xoap;
4490
4491 if ((!target) || (!vap) || (!xvattr))
4492 return (-1);
4493
4494 /* validate reparse string */
4495 if (reparse_validate((const char *)target))
4496 return (-1);
4497
4498 xva_init(xvattr);
4499 xvattr->xva_vattr = *vap;
4500 xvattr->xva_vattr.va_mask |= AT_XVATTR;
4501 xoap = xva_getxoptattr(xvattr);
4502 ASSERT(xoap);
4503 XVA_SET_REQ(xvattr, XAT_REPARSE);
4504 xoap->xoa_reparse = 1;
4505
4506 return (0);
4507 }
4508
4509 /*
4510 * Function to check whether a symlink is a reparse point.
4511 * Return B_TRUE if it is a reparse point, else return B_FALSE
4512 */
4513 boolean_t
vn_is_reparse(vnode_t * vp,cred_t * cr,caller_context_t * ct)4514 vn_is_reparse(vnode_t *vp, cred_t *cr, caller_context_t *ct)
4515 {
4516 xvattr_t xvattr;
4517 xoptattr_t *xoap;
4518
4519 if ((vp->v_type != VLNK) ||
4520 !(vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR)))
4521 return (B_FALSE);
4522
4523 xva_init(&xvattr);
4524 xoap = xva_getxoptattr(&xvattr);
4525 ASSERT(xoap);
4526 XVA_SET_REQ(&xvattr, XAT_REPARSE);
4527
4528 if (VOP_GETATTR(vp, &xvattr.xva_vattr, 0, cr, ct))
4529 return (B_FALSE);
4530
4531 if ((!(xvattr.xva_vattr.va_mask & AT_XVATTR)) ||
4532 (!(XVA_ISSET_RTN(&xvattr, XAT_REPARSE))))
4533 return (B_FALSE);
4534
4535 return (xoap->xoa_reparse ? B_TRUE : B_FALSE);
4536 }
4537