xref: /openbsd-src/share/man/man9/vnode.9 (revision a28daedfc357b214be5c701aa8ba8adb29a7f1c2)
1.\"     $OpenBSD: vnode.9,v 1.27 2008/04/12 10:18:01 jmc Exp $
2.\"
3.\" Copyright (c) 2001 Constantine Sapuntzakis
4.\" All rights reserved.
5.\"
6.\" Redistribution and use in source and binary forms, with or without
7.\" modification, are permitted provided that the following conditions
8.\" are met:
9.\"
10.\" 1. Redistributions of source code must retain the above copyright
11.\"    notice, this list of conditions and the following disclaimer.
12.\" 2. The name of the author may not be used to endorse or promote products
13.\"    derived from this software without specific prior written permission.
14.\"
15.\" THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
16.\" INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
17.\" AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
18.\" THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19.\" EXEMPLARY, OR CONSEQUENTIAL  DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20.\" PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21.\" OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23.\" OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
24.\" ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25.\"
26.Dd $Mdocdate: April 12 2008 $
27.Dt VNODE 9
28.Os
29.Sh NAME
30.Nm vnode
31.Nd an overview of vnodes
32.Sh DESCRIPTION
33A
34.Em vnode
35is an object in kernel memory that speaks the
36.Ux
37file interface (open, read, write, close, readdir, etc.).
38Vnodes can represent files, directories, FIFOs, domain sockets, block devices,
39character devices.
40.Pp
41Each vnode has a set of methods which start with the string
42.Dq VOP_ .
43These methods include
44.Fn VOP_OPEN ,
45.Fn VOP_READ ,
46.Fn VOP_WRITE ,
47.Fn VOP_RENAME ,
48.Fn VOP_CLOSE ,
49and
50.Fn VOP_MKDIR .
51Many of these methods correspond closely to the equivalent
52file system call \-
53.Xr open 2 ,
54.Xr read 2 ,
55.Xr write 2 ,
56.Xr rename 2 ,
57etc.
58Each file system (FFS, NFS, etc.) provides implementations for these methods.
59.Pp
60The Virtual File System library (see
61.Xr vfs 9 )
62maintains a pool of vnodes.
63File systems cannot allocate their own vnodes; they must use the functions
64provided by the VFS to create and manage vnodes.
65.Pp
66The definition of a vnode is as follows:
67.Bd -literal
68struct vnode {
69	struct uvm_vnode v_uvm;		/* uvm(9) data */
70	int	(**v_op)(void *);	/* vnode operations vector */
71	enum	vtype v_type;		/* vnode type */
72	u_int	v_flag;			/* vnode flags (see below) */
73	u_int	v_usecount;		/* reference count of users */
74	u_int	v_writecount;		/* reference count of writers */
75	/* Flags that can be read/written in interrupts */
76	u_int	v_bioflag;		/* flags used by intr handlers */
77	u_int	v_holdcnt;		/* buffer references */
78	u_int	v_id;			/* capability identifier */
79	struct	mount *v_mount;		/* ptr to vfs we are in */
80	TAILQ_ENTRY(vnode) v_freelist;	/* vnode freelist */
81	LIST_ENTRY(vnode) v_mntvnodes;	/* vnodes for mount point */
82	struct	buflists v_cleanblkhd;	/* clean blocklist head */
83	struct	buflists v_dirtyblkhd;	/* dirty blocklist head */
84	u_int	v_numoutput;		/* num of writes in progress */
85	LIST_ENTRY(vnode) v_synclist;	/* vnode with dirty buffers */
86	union {
87	  struct mount    *vu_mountedhere;/* ptr to mounted vfs (VDIR) */
88	  struct socket   *vu_socket;	/* UNIX IPC (VSOCK) */
89	  struct specinfo *vu_specinfo;	/* device (VCHR, VBLK) */
90	  struct fifoinfo *vu_fifoinfo;	/* fifo (VFIFO) */
91	} v_un;
92
93	enum	vtagtype v_tag;		/* type of underlying data */
94	void	*v_data;		/* private data for fs */
95	struct {
96	  struct simplelock vsi_lock;	/* lock to protect below */
97	  struct selinfo vsi_selinfo;	/* identity of poller(s) */
98	} v_selectinfo;
99};
100#define v_mountedhere	v_un.vu_mountedhere
101#define v_socket	v_un.vu_socket
102#define v_specinfo	v_un.vu_specinfo
103#define v_fifoinfo	v_un.vu_fifoinfo
104.Ed
105.Ss Vnode life cycle
106When a client of the VFS requests a new vnode, the vnode allocation
107code can reuse an old vnode object that is no longer in use.
108Whether a vnode is in use is tracked by the vnode reference count
109.Pq Va v_usecount .
110By convention, each open file handle holds a reference
111as do VM objects backed by files.
112A vnode with a reference count of 1 or more will not be deallocated or
113reused to point to a different file.
114So, if you want to ensure that your vnode doesn't become a different
115file under you, you better be sure you have a reference to it.
116A vnode that points to a valid file and has a reference count of 1 or more
117is called
118.Em active .
119.Pp
120When a vnode's reference count drops to zero, it becomes
121.Em inactive ,
122that is, a candidate for reuse.
123An inactive vnode still refers to a valid file and one can try to
124reactivate it using
125.Xr vget 9
126(this is used a lot by caches).
127.Pp
128Before the VFS can reuse an inactive vnode to refer to another file,
129it must clean all information pertaining to the old file.
130A cleaned out vnode is called a
131.Em reclaimed
132vnode.
133.Pp
134To support forceable unmounts and the
135.Xr revoke 2
136system call, the VFS may reclaim a vnode with a positive reference
137count.
138The reclaimed vnode is given to the dead file system, which
139returns errors for most operations.
140The reclaimed vnode will not be
141reused for another file until its reference count hits zero.
142.Ss Vnode pool
143The
144.Xr getnewvnode 9
145call allocates a vnode from the pool, possibly reusing an
146inactive vnode, and returns it to the caller.
147The vnode returned has a reference count
148.Pq Va v_usecount
149of 1.
150.Pp
151The
152.Xr vref 9
153call increments the reference count on the vnode.
154It may only be on a vnode with reference count of 1 or greater.
155The
156.Xr vrele 9
157and
158.Xr vput 9
159calls decrement the reference count.
160In addition, the
161.Xr vput 9
162call also releases the vnode lock.
163.Pp
164The
165.Xr vget 9
166call, when used on an inactive vnode, will make the vnode active
167by bumping the reference count to one.
168When called on an active vnode,
169.Fn vget
170increases the reference count by one.
171However, if the vnode is being reclaimed concurrently, then
172.Fn vget
173will fail and return an error.
174.Pp
175The
176.Xr vgone 9
177and
178.Xr vgonel 9
179calls
180orchestrate the reclamation of a vnode.
181They can be called on both active and inactive vnodes.
182.Pp
183When transitioning a vnode to the reclaimed state, the VFS will call
184.Xr VOP_RECLAIM 9
185method.
186File systems use this method to free any file-system-specific data
187they attached to the vnode.
188.Ss Vnode locks
189The vnode actually has two different types of locks: the vnode lock
190and the vnode reclamation lock
191.Pq Dv VXLOCK .
192.Ss The vnode lock
193The vnode lock and its consistent use accomplishes the following:
194.Bl -bullet
195.It
196It keeps a locked vnode from changing across certain pairs of VOP_ calls,
197thus preserving cached data.
198For example, it keeps the directory from
199changing between a
200.Xr VOP_LOOKUP 9
201call and a
202.Xr VOP_CREATE 9 .
203The
204.Fn VOP_LOOKUP
205call makes sure the name doesn't already exist in the
206directory and finds free room in the directory for the new entry.
207The
208.Fn VOP_CREATE
209call can then go ahead and create the file without checking if
210it already exists or looking for free space.
211.It
212Some file systems rely on it to ensure that only one
213.Dq thread
214at a time
215is calling VOP_ vnode operations on a given file or directory.
216Otherwise, the file system's behavior is undefined.
217.It
218On rare occasions, code will hold the vnode lock so that a series of
219VOP_ operations occurs as an atomic unit.
220(Of course, this doesn't work with network file systems like NFSv2 that don't
221have any notion of bundling a bunch of operations into an atomic unit.)
222.It
223While the vnode lock is held, the vnode will not be reclaimed.
224.El
225.Pp
226There is a discipline to using the vnode lock.
227Some VOP_ operations require that the vnode lock is held before being called.
228A description of this rather arcane locking discipline is in
229.Pa sys/kern/vnode_if.src .
230.Pp
231The vnode lock is acquired by calling
232.Xr vn_lock 9
233and released by calling
234.Xr VOP_UNLOCK 9 .
235.Pp
236A process is allowed to sleep while holding the vnode lock.
237.Pp
238The implementation of the vnode lock is the responsibility of the individual
239file systems.
240Not all file systems implement it.
241.Pp
242To prevent deadlocks, when acquiring locks on multiple vnodes, the lock
243of parent directory must be acquired before the lock on the child directory.
244.Ss Other vnode synchronization
245The vnode reclamation lock
246.Pq Dv VXLOCK
247is used to prevent multiple
248processes from entering the vnode reclamation code.
249It is also used as a flag to indicate that reclamation is in progress.
250The
251.Dv VXWANT
252flag is set by processes that wish to be woken up when reclamation
253is finished.
254.Pp
255The
256.Xr vwaitforio 9
257call is used to wait for all outstanding write I/Os associated with a
258vnode to complete.
259.Ss Version number/capability
260The vnode capability,
261.Va v_id ,
262is a 32-bit version number on the vnode.
263Every time a vnode is reassigned to a new file, the vnode capability
264is changed.
265This is used by code that wishes to keep pointers to vnodes but doesn't want
266to hold a reference (e.g., caches).
267The code keeps both a vnode pointer and a copy of the capability.
268The code can later compare the vnode's capability to its copy and see
269if the vnode still points to the same file.
270.Pp
271Note: for this to work, memory assigned to hold a
272.Vt struct vnode
273can
274only be used for another purpose when all pointers to it have disappeared.
275Since the vnode pool has no way of knowing when all pointers have
276disappeared, it never frees memory it has allocated for vnodes.
277.Ss Vnode fields
278Most of the fields of the vnode structure should be treated as opaque
279and only manipulated through the proper APIs.
280This section describes the fields that are manipulated directly.
281.Pp
282The
283.Va v_flag
284attribute contains random flags related to various functions.
285They are summarized in the following table:
286.Pp
287.Bl -tag -width 10n -compact -offset indent
288.It Dv VROOT
289This vnode is the root of its file system.
290.It Dv VTEXT
291This vnode is a pure text prototype.
292.It Dv VSYSTEM
293This vnode is being used by kernel.
294.It Dv VISTTY
295This vnode represents a
296.Xr tty 4 .
297.It Dv VXLOCK
298This vnode is locked to change its underlying type.
299.It Dv VXWANT
300A process is waiting for this vnode.
301.It Dv VALIASED
302This vnode has an alias.
303.It Dv VLOCKSWORK
304This vnode's underlying file system supports locking discipline.
305.El
306.Pp
307The
308.Va v_tag
309attribute indicates what file system the vnode belongs to.
310Very little code actually uses this attribute and its use is deprecated.
311Programmers should seriously consider using more object-oriented approaches
312(e.g. function tables).
313There is no safe way of defining new
314.Va v_tag Ns 's
315for loadable file systems.
316The
317.Va v_tag
318attribute is read-only.
319.Pp
320The
321.Va v_type
322attribute indicates what type of file (e.g. directory,
323regular, FIFO) this vnode is.
324This is used by the generic code for various checks.
325For example, the
326.Xr read 2
327system call returns zero when a read is attempted on a directory.
328.Pp
329Possible types are:
330.Pp
331.Bl -tag -width 10n -offset indent -compact
332.It Dv VNON
333This vnode has no type.
334.It Dv VREG
335This vnode represents a regular file.
336.It Dv VDIR
337This vnode represents a directory.
338.It Dv VBLK
339This vnode represents a block device.
340.It Dv VCHR
341This vnode represents a character device.
342.It Dv VLNK
343This vnode represents a symbolic link.
344.It Dv VSOCK
345This vnode represents a socket.
346.It Dv VFIFO
347This vnode represents a named pipe.
348.It Dv VBAD
349This vnode represents a bad or dead file.
350.El
351.Pp
352The
353.Va v_data
354attribute allows a file system to attach a piece of file
355system specific memory to the vnode.
356This contains information about the file that is specific to
357the file system (such as an inode pointer in the case of FFS).
358.Pp
359The
360.Va v_numoutput
361attribute indicates the number of pending synchronous
362and asynchronous writes on the vnode.
363It does not track the number of dirty buffers attached to the vnode.
364The attribute is used by code like
365.Xr fsync 2
366to wait for all writes
367to complete before returning to the user.
368This attribute must be manipulated at
369.Xr splbio 9 .
370.Pp
371The
372.Va v_writecount
373attribute tracks the number of write calls pending
374on the vnode.
375.Ss Rules
376The vast majority of vnode functions may not be called from interrupt
377context.
378The exceptions are
379.Fn bgetvp
380and
381.Fn brelvp .
382The following fields of the vnode are manipulated at interrupt level:
383.Va v_numoutput , v_holdcnt , v_dirtyblkhd ,
384.Va v_cleanblkhd , v_bioflag , v_freelist ,
385and
386.Va v_synclist .
387Any access to these fields should be protected by
388.Xr splbio 9 .
389.Sh SEE ALSO
390.Xr uvm 9 ,
391.Xr vaccess 9 ,
392.Xr vclean 9 ,
393.Xr vcount 9 ,
394.Xr vdevgone 9 ,
395.Xr vfinddev 9 ,
396.Xr vflush 9 ,
397.Xr vflushbuf 9 ,
398.Xr vfs 9 ,
399.Xr vget 9 ,
400.Xr vgone 9 ,
401.Xr vhold 9 ,
402.Xr vinvalbuf 9 ,
403.Xr vn_lock 9 ,
404.Xr VOP_LOOKUP 9 ,
405.Xr vput 9 ,
406.Xr vrecycle 9 ,
407.Xr vref 9 ,
408.Xr vrele 9 ,
409.Xr vwaitforio 9 ,
410.Xr vwakeup 9
411.Sh HISTORY
412This document first appeared in
413.Ox 2.9 .
414