xref: /openbsd-src/share/man/man9/vnode.9 (revision daf88648c0e349d5c02e1504293082072c981640)
1.\"     $OpenBSD: vnode.9,v 1.22 2005/10/19 16:52:19 pedro Exp $
2.\"
3.\" Copyright (c) 2001 Constantine Sapuntzakis
4.\" All rights reserved.
5.\"
6.\" Redistribution and use in source and binary forms, with or without
7.\" modification, are permitted provided that the following conditions
8.\" are met:
9.\"
10.\" 1. Redistributions of source code must retain the above copyright
11.\"    notice, this list of conditions and the following disclaimer.
12.\" 2. The name of the author may not be used to endorse or promote products
13.\"    derived from this software without specific prior written permission.
14.\"
15.\" THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
16.\" INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
17.\" AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
18.\" THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19.\" EXEMPLARY, OR CONSEQUENTIAL  DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20.\" PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21.\" OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23.\" OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
24.\" ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25.\"
26.Dd September 16, 2004
27.Dt VNODE 9
28.Os
29.Sh NAME
30.Nm vnode
31.Nd an overview of vnodes
32.Sh DESCRIPTION
33A
34.Em vnode
35is an object in kernel memory that speaks the
36.Ux
37file interface (open, read, write, close, readdir, etc.).
38Vnodes can represent files, directories, FIFOs, domain sockets, block devices,
39character devices.
40.Pp
41Each vnode has a set of methods which start with the string
42.Dq VOP_ .
43These methods include
44.Fn VOP_OPEN ,
45.Fn VOP_READ ,
46.Fn VOP_WRITE ,
47.Fn VOP_RENAME ,
48.Fn VOP_CLOSE ,
49and
50.Fn VOP_MKDIR .
51Many of these methods correspond closely to the equivalent
52file system call \-
53.Xr open 2 ,
54.Xr read 2 ,
55.Xr write 2 ,
56.Xr rename 2 ,
57etc.
58Each file system (FFS, NFS, etc.) provides implementations for these methods.
59.Pp
60The Virtual File System library (see
61.Xr vfs 9 )
62maintains a pool of vnodes.
63File systems cannot allocate their own vnodes; they must use the functions
64provided by the VFS to create and manage vnodes.
65.Pp
66The definition of a vnode is as follows:
67.Bd -literal
68struct vnode {
69	struct uvm_vnode v_uvm;		/* uvm(9) data */
70	int	(**v_op)(void *);	/* vnode operations vector */
71	enum	vtype v_type;		/* vnode type */
72	u_int	v_flag;			/* vnode flags (see below) */
73	u_int	v_usecount;		/* reference count of users */
74	u_int	v_writecount;		/* reference count of writers */
75	/* Flags that can be read/written in interrupts */
76	u_int	v_bioflag;		/* flags used by intr handlers */
77	u_int	v_holdcnt;		/* buffer references */
78	u_int	v_id;			/* capability identifier */
79	struct	mount *v_mount;		/* ptr to vfs we are in */
80	TAILQ_ENTRY(vnode) v_freelist;	/* vnode freelist */
81	LIST_ENTRY(vnode) v_mntvnodes;	/* vnodes for mount point */
82	struct	buflists v_cleanblkhd;	/* clean blocklist head */
83	struct	buflists v_dirtyblkhd;	/* dirty blocklist head */
84	u_int	v_numoutput;		/* num of writes in progress */
85	LIST_ENTRY(vnode) v_synclist;	/* vnode with dirty buffers */
86	union {
87	  struct mount    *vu_mountedhere;/* ptr to mounted vfs (VDIR) */
88	  struct socket   *vu_socket;	/* UNIX IPC (VSOCK) */
89	  struct specinfo *vu_specinfo;	/* device (VCHR, VBLK) */
90	  struct fifoinfo *vu_fifoinfo;	/* fifo (VFIFO) */
91	} v_un;
92
93	struct	simplelock v_interlock;	/* lock on usecount and flag */
94	enum	vtagtype v_tag;		/* type of underlying data */
95	void	*v_data;		/* private data for fs */
96	struct {
97	  struct simplelock vsi_lock;	/* lock to protect below */
98	  struct selinfo vsi_selinfo;	/* identity of poller(s) */
99	} v_selectinfo;
100};
101#define v_mountedhere	v_un.vu_mountedhere
102#define v_socket	v_un.vu_socket
103#define v_specinfo	v_un.vu_specinfo
104#define v_fifoinfo	v_un.vu_fifoinfo
105.Ed
106.Ss Vnode life cycle
107When a client of the VFS requests a new vnode, the vnode allocation
108code can reuse an old vnode object that is no longer in use.
109Whether a vnode is in use is tracked by the vnode reference count
110.Pq Va v_usecount .
111By convention, each open file handle holds a reference
112as do VM objects backed by files.
113A vnode with a reference count of 1 or more will not be deallocated or
114reused to point to a different file.
115So, if you want to ensure that your vnode doesn't become a different
116file under you, you better be sure you have a reference to it.
117A vnode that points to a valid file and has a reference count of 1 or more
118is called
119.Em active .
120.Pp
121When a vnode's reference count drops to zero, it becomes
122.Em inactive ,
123that is, a candidate for reuse.
124An inactive vnode still refers to a valid file and one can try to
125reactivate it using
126.Xr vget 9
127(this is used a lot by caches).
128.Pp
129Before the VFS can reuse an inactive vnode to refer to another file,
130it must clean all information pertaining to the old file.
131A cleaned out vnode is called a
132.Em reclaimed
133vnode.
134.Pp
135To support forceable unmounts and the
136.Xr revoke 2
137system call, the VFS may reclaim a vnode with a positive reference
138count.
139The reclaimed vnode is given to the dead file system, which
140returns errors for most operations.
141The reclaimed vnode will not be
142reused for another file until its reference count hits zero.
143.Ss Vnode pool
144The
145.Xr getnewvnode 9
146system call allocates a vnode from the pool, possibly reusing an
147inactive vnode, and returns it to the caller.
148The vnode returned has a reference count
149.Pq Va v_usecount
150of 1.
151.Pp
152The
153.Xr vref 9
154call increments the reference count on the vnode.
155It may only be on a vnode with reference count of 1 or greater.
156The
157.Xr vrele 9
158and
159.Xr vput 9
160calls decrement the reference count.
161In addition, the
162.Xr vput 9
163call also releases the vnode lock.
164.Pp
165The
166.Xr vget 9
167call, when used on an inactive vnode, will make the vnode active
168by bumping the reference count to one.
169When called on an active vnode,
170.Fn vget
171increases the reference count by one.
172However, if the vnode is being reclaimed concurrently, then
173.Fn vget
174will fail and return an error.
175.Pp
176The
177.Xr vgone 9
178and
179.Xr vgonel 9
180calls
181orchestrate the reclamation of a vnode.
182They can be called on both active and inactive vnodes.
183.Pp
184When transitioning a vnode to the reclaimed state, the VFS will call
185.Xr VOP_RECLAIM 9
186method.
187File systems use this method to free any file-system-specific data
188they attached to the vnode.
189.Ss Vnode locks
190The vnode actually has three different types of lock: the vnode lock,
191the vnode interlock, and the vnode reclamation lock
192.Pq Dv VXLOCK .
193.Ss The vnode lock
194The vnode lock and its consistent use accomplishes the following:
195.Bl -bullet
196.It
197It keeps a locked vnode from changing across certain pairs of VOP_ calls,
198thus preserving cached data.
199For example, it keeps the directory from
200changing between a
201.Xr VOP_LOOKUP 9
202call and a
203.Xr VOP_CREATE 9 .
204The
205.Fn VOP_LOOKUP
206call makes sure the name doesn't already exist in the
207directory and finds free room in the directory for the new entry.
208The
209.Fn VOP_CREATE
210call can then go ahead and create the file without checking if
211it already exists or looking for free space.
212.It
213Some file systems rely on it to ensure that only one
214.Dq thread
215at a time
216is calling VOP_ vnode operations on a given file or directory.
217Otherwise, the file system's behavior is undefined.
218.It
219On rare occasions, code will hold the vnode lock so that a series of
220VOP_ operations occurs as an atomic unit.
221(Of course, this doesn't work with network file systems like NFSv2 that don't
222have any notion of bundling a bunch of operations into an atomic unit.)
223.It
224While the vnode lock is held, the vnode will not be reclaimed.
225.El
226.Pp
227There is a discipline to using the vnode lock.
228Some VOP_ operations require that the vnode lock is held before being called.
229A description of this rather arcane locking discipline is in
230.Pa sys/kern/vnode_if.src .
231.Pp
232The vnode lock is acquired by calling
233.Xr vn_lock 9
234and released by calling
235.Xr VOP_UNLOCK 9 .
236.Pp
237A process is allowed to sleep while holding the vnode lock.
238.Pp
239The implementation of the vnode lock is the responsibility of the individual
240file systems.
241Not all file systems implement it.
242.Pp
243To prevent deadlocks, when acquiring locks on multiple vnodes, the lock
244of parent directory must be acquired before the lock on the child directory.
245.Ss Vnode interlock
246The vnode interlock
247.Pq Va v_interlock
248is a simplelock (see
249.Xr simple_lock 9 ) .
250It is useful on multi-processor systems for acquiring a quick exclusive
251lock on the contents of the vnode.
252It MUST NOT be held while sleeping.
253.Pp
254This field protects the
255.Va v_flag , v_writecount , v_usecount ,
256and
257.Va v_holdcnt
258fields from concurrent access.
259See
260.Xr lock 9
261for more details on lock synchronization in interrupt context.
262.\" Other splbio/interrupt issues?
263.Pp
264Operations on this lock are a no-op on uniprocessor systems.
265.Ss Other vnode synchronization
266The vnode reclamation lock
267.Pq Dv VXLOCK
268is used to prevent multiple
269processes from entering the vnode reclamation code.
270It is also used as a flag to indicate that reclamation is in progress.
271The
272.Dv VXWANT
273flag is set by processes that wish to be woken up when reclamation
274is finished.
275.Pp
276The
277.Xr vwaitforio 9
278call is used to wait for all outstanding write I/Os associated with a
279vnode to complete.
280.Ss Version number/capability
281The vnode capability,
282.Va v_id ,
283is a 32-bit version number on the vnode.
284Every time a vnode is reassigned to a new file, the vnode capability
285is changed.
286This is used by code that wishes to keep pointers to vnodes but doesn't want
287to hold a reference (e.g., caches).
288The code keeps both a vnode pointer and a copy of the capability.
289The code can later compare the vnode's capability to its copy and see
290if the vnode still points to the same file.
291.Pp
292Note: for this to work, memory assigned to hold a
293.Vt struct vnode
294can
295only be used for another purpose when all pointers to it have disappeared.
296Since the vnode pool has no way of knowing when all pointers have
297disappeared, it never frees memory it has allocated for vnodes.
298.Ss Vnode fields
299Most of the fields of the vnode structure should be treated as opaque
300and only manipulated through the proper APIs.
301This section describes the fields that are manipulated directly.
302.Pp
303The
304.Va v_flag
305attribute contains random flags related to various functions.
306They are summarized in the following table:
307.Pp
308.Bl -tag -width 10n -compact -offset indent
309.It Dv VROOT
310This vnode is the root of its file system.
311.It Dv VTEXT
312This vnode is a pure text prototype.
313.It Dv VSYSTEM
314This vnode is being used by kernel.
315.It Dv VISTTY
316This vnode represents a
317.Xr tty 4 .
318.It Dv VXLOCK
319This vnode is locked to change its underlying type.
320.It Dv VXWANT
321A process is waiting for this vnode.
322.It Dv VALIASED
323This vnode has an alias.
324.It Dv VLAYER
325This vnode is on a layered file system.
326.It Dv VLOCKSWORK
327This vnode's underlying file system supports locking discipline.
328.El
329.Pp
330The
331.Va v_tag
332attribute indicates what file system the vnode belongs to.
333Very little code actually uses this attribute and its use is deprecated.
334Programmers should seriously consider using more object-oriented approaches
335(e.g. function tables).
336There is no safe way of defining new
337.Va v_tag Ns 's
338for loadable file systems.
339The
340.Va v_tag
341attribute is read-only.
342.Pp
343The
344.Va v_type
345attribute indicates what type of file (e.g. directory,
346regular, FIFO) this vnode is.
347This is used by the generic code for various checks.
348For example, the
349.Xr read 2
350system call returns an error when a read is attempted on a directory.
351.Pp
352Possible types are:
353.Pp
354.Bl -tag -width 10n -offset indent -compact
355.It Dv VNON
356This vnode has no type.
357.It Dv VREG
358This vnode represents a regular file.
359.It Dv VDIR
360This vnode represents a directory.
361.It Dv VBLK
362This vnode represents a block device.
363.It Dv VCHR
364This vnode represents a character device.
365.It Dv VLNK
366This vnode represents a symbolic link.
367.It Dv VSOCK
368This vnode represents a socket.
369.It Dv VFIFO
370This vnode represents a named pipe.
371.It Dv VBAD
372This vnode represents a bad or dead file.
373.El
374.Pp
375The
376.Va v_data
377attribute allows a file system to attach a piece of file
378system specific memory to the vnode.
379This contains information about the file that is specific to
380the file system (such as an inode pointer in the case of FFS).
381.Pp
382The
383.Va v_numoutput
384attribute indicates the number of pending synchronous
385and asynchronous writes on the vnode.
386It does not track the number of dirty buffers attached to the vnode.
387The attribute is used by code like
388.Xr fsync 2
389to wait for all writes
390to complete before returning to the user.
391This attribute must be manipulated at
392.Xr splbio 9 .
393.Pp
394The
395.Va v_writecount
396attribute tracks the number of write calls pending
397on the vnode.
398.Ss Rules
399The vast majority of vnode functions may not be called from interrupt
400context.
401The exceptions are
402.Fn bgetvp
403and
404.Fn brelvp .
405The following fields of the vnode are manipulated at interrupt level:
406.Va v_numoutput , v_holdcnt , v_dirtyblkhd ,
407.Va v_cleanblkhd , v_bioflag , v_freelist ,
408and
409.Va v_synclist .
410Any access to these fields should be protected by
411.Xr splbio 9 .
412.Sh SEE ALSO
413.Xr uvm 9 ,
414.Xr vaccess 9 ,
415.Xr vclean 9 ,
416.Xr vcount 9 ,
417.Xr vdevgone 9 ,
418.Xr vfinddev 9 ,
419.Xr vflush 9 ,
420.Xr vflushbuf 9 ,
421.Xr vfs 9 ,
422.Xr vget 9 ,
423.Xr vgone 9 ,
424.Xr vhold 9 ,
425.Xr vinvalbuf 9 ,
426.Xr vn_lock 9 ,
427.Xr VOP_LOOKUP 9 ,
428.Xr vput 9 ,
429.Xr vrecycle 9 ,
430.Xr vref 9 ,
431.Xr vrele 9 ,
432.Xr vwaitforio 9 ,
433.Xr vwakeup 9
434.Sh HISTORY
435This document first appeared in
436.Ox 2.9 .
437