xref: /netbsd-src/share/man/man9/disk.9 (revision 466a16a118933bd295a8a104f095714fadf9cf68)
1.\"	$NetBSD: disk.9,v 1.27 2008/05/03 09:43:40 plunky Exp $
2.\"
3.\" Copyright (c) 1995, 1996 Jason R. Thorpe.
4.\" All rights reserved.
5.\"
6.\" Redistribution and use in source and binary forms, with or without
7.\" modification, are permitted provided that the following conditions
8.\" are met:
9.\" 1. Redistributions of source code must retain the above copyright
10.\"    notice, this list of conditions and the following disclaimer.
11.\" 2. Redistributions in binary form must reproduce the above copyright
12.\"    notice, this list of conditions and the following disclaimer in the
13.\"    documentation and/or other materials provided with the distribution.
14.\" 3. All advertising materials mentioning features or use of this software
15.\"    must display the following acknowledgement:
16.\"	This product includes software developed for the NetBSD Project
17.\"	by Jason R. Thorpe.
18.\" 4. The name of the author may not be used to endorse or promote products
19.\"    derived from this software without specific prior written permission.
20.\"
21.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26.\" BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27.\" LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
28.\" AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29.\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31.\" SUCH DAMAGE.
32.\"
33.Dd May 3, 2008
34.Dt DISK 9
35.Os
36.Sh NAME
37.Nm disk ,
38.Nm disk_init ,
39.Nm disk_attach ,
40.Nm disk_detach ,
41.Nm disk_destroy ,
42.Nm disk_busy ,
43.Nm disk_unbusy ,
44.Nm disk_find ,
45.Nm disk_blocksize
46.Nd generic disk framework
47.Sh SYNOPSIS
48.In sys/types.h
49.In sys/disklabel.h
50.In sys/disk.h
51.Ft void
52.Fn disk_init "struct disk *" "const char *name" "const struct dkdriver *driver"
53.Ft void
54.Fn disk_attach "struct disk *"
55.Ft void
56.Fn disk_detach "struct disk *"
57.Ft void
58.Fn disk_destroy "struct disk *"
59.Ft void
60.Fn disk_busy "struct disk *"
61.Ft void
62.Fn disk_unbusy "struct disk *" "long bcount" "int read"
63.Ft struct disk *
64.Fn disk_find "const char *"
65.Ft void
66.Fn disk_blocksize "struct disk *" "int blocksize"
67.Sh DESCRIPTION
68The
69.Nx
70generic disk framework is designed to provide flexible,
71scalable, and consistent handling of disk state and metrics information.
72The fundamental component of this framework is the
73.Nm disk
74structure, which is defined as follows:
75.Bd -literal
76struct disk {
77	TAILQ_ENTRY(disk) dk_link;	/* link in global disklist */
78	const char	*dk_name;	/* disk name */
79	prop_dictionary_t dk_info;	/* reference to disk-info dictionary */
80	int		dk_bopenmask;	/* block devices open */
81	int		dk_copenmask;	/* character devices open */
82	int		dk_openmask;	/* composite (bopen|copen) */
83	int		dk_state;	/* label state   ### */
84	int		dk_blkshift;	/* shift to convert DEV_BSIZE to blks */
85	int		dk_byteshift;	/* shift to convert bytes to blks */
86
87	/*
88	 * Metrics data; note that some metrics may have no meaning
89	 * on certain types of disks.
90	 */
91	struct io_stats	*dk_stats;
92
93	const struct dkdriver *dk_driver;	/* pointer to driver */
94
95	/*
96	 * Information required to be the parent of a disk wedge.
97	 */
98	kmutex_t	dk_rawlock;	/* lock on these fields */
99	u_int		dk_rawopens;	/* # of openes of rawvp */
100	struct vnode	*dk_rawvp;	/* vnode for the RAW_PART bdev */
101
102	kmutex_t	dk_openlock;	/* lock on these and openmask */
103	u_int		dk_nwedges;	/* # of configured wedges */
104					/* all wedges on this disk */
105	LIST_HEAD(, dkwedge_softc) dk_wedges;
106
107	/*
108	 * Disk label information.  Storage for the in-core disk label
109	 * must be dynamically allocated, otherwise the size of this
110	 * structure becomes machine-dependent.
111	 */
112	daddr_t		dk_labelsector;		/* sector containing label */
113	struct disklabel *dk_label;	/* label */
114	struct cpu_disklabel *dk_cpulabel;
115};
116.Ed
117.Pp
118The system maintains a global linked-list of all disks attached to the
119system.
120This list, called
121.Nm disklist ,
122may grow or shrink over time as disks are dynamically added and removed
123from the system.
124Drivers which currently make use of the detachment
125capability of the framework are the
126.Nm ccd
127and
128.Nm vnd
129pseudo-device drivers.
130.Pp
131The following is a brief description of each function in the framework:
132.Bl -tag -width ".Fn disk_blocksize"
133.It Fn disk_init
134Initialize the disk structure.
135.It Fn disk_attach
136Attach a disk; allocate storage for the disklabel, set the
137.Dq attached time
138timestamp, insert the disk into the disklist, and increment the
139system disk count.
140.It Fn disk_detach
141Detach a disk; free storage for the disklabel, remove the disk
142from the disklist, and decrement the system disk count.
143If the count drops below zero, panic.
144.It Fn disk_destroy
145Release resources used by the disk structure when it is no longer
146required.
147.It Fn disk_busy
148Increment the disk's
149.Dq busy counter .
150If this counter goes from 0 to 1, set the timestamp corresponding to
151this transfer.
152.It Fn disk_unbusy
153Decrement a disk's busy counter.
154If the count drops below zero, panic.
155Get the current time, subtract it from the disk's timestamp, and add
156the difference to the disk's running total.
157Set the disk's timestamp to the current time.
158If the provided byte count is greater than 0, add it to the disk's
159running total and increment the number of transfers performed by the disk.
160The third argument
161.Ar read
162specifies the direction of I/O;
163if non-zero it means reading from the disk,
164otherwise it means writing to the disk.
165.It Fn disk_find
166Return a pointer to the disk structure corresponding to the name provided,
167or NULL if the disk does not exist.
168.It Fn disk_blocksize
169Initialize
170.Fa dk_blkshift
171and
172.Fa dk_byteshift
173members of
174.Fa struct disk
175with suitable values derived from the supplied physical blocksize.
176It is only necessary to call this function if the device's physical blocksize
177is not
178.Dv DEV_BSIZE .
179.El
180.Pp
181The functions typically called by device drivers are
182.Fn disk_init
183.Fn disk_attach ,
184.Fn disk_detach ,
185.Fn disk_destroy,
186.Fn disk_busy ,
187.Fn disk_unbusy ,
188and
189.Fn disk_blocksize .
190The function
191.Fn disk_find
192is provided as a utility function.
193.Sh USING THE FRAMEWORK
194This section includes a description on basic use of the framework
195and example usage of its functions.
196Actual implementation of a device driver which uses the framework
197may vary.
198.Pp
199Each device in the system uses a
200.Dq softc
201structure which contains autoconfiguration and state information for that
202device.
203In the case of disks, the softc should also contain one instance
204of the disk structure, e.g.:
205.Bd -literal
206struct foo_softc {
207	device_t	sc_dev;		/* generic device information */
208	struct	disk	sc_dk;		/* generic disk information */
209	[ . . . more . . . ]
210};
211.Ed
212.Pp
213In order for the system to gather metrics data about a disk, the disk must
214be registered with the system.
215The
216.Fn disk_attach
217routine performs all of the functions currently required to register a disk
218with the system including allocation of disklabel storage space,
219recording of the time since boot that the disk was attached, and insertion
220into the disklist.
221Note that since this function allocates storage space for the disklabel,
222it must be called before the disklabel is read from the media or used in
223any other way.
224Before
225.Fn disk_attach
226is called, a portions of the disk structure must be initialized with
227data specific to that disk.
228For example, in the
229.Dq foo
230disk driver, the following would be performed in the autoconfiguration
231.Dq attach
232routine:
233.Bd -literal
234void
235fooattach(device_t parent, device_t self, void *aux)
236{
237	struct foo_softc *sc = device_private(self);
238	[ . . . ]
239
240	/* Initialize and attach the disk structure. */
241	disk_init(\*[Am]sc-\*[Gt]sc_dk, device_xname(self), \*[Am]foodkdriver);
242	disk_attach(\*[Am]sc-\*[Gt]sc_dk);
243
244	/* Read geometry and fill in pertinent parts of disklabel. */
245	[ . . . ]
246	disk_blocksize(\*[Am]sc-\*[Gt]sc_dk, bytes_per_sector);
247}
248.Ed
249.Pp
250The
251.Nm foodkdriver
252above is the disk's
253.Dq driver
254switch.
255This switch currently includes a pointer to the disk's
256.Dq strategy
257routine.
258This switch needs to have global scope and should be initialized as follows:
259.Bd -literal
260void foostrategy(struct buf *);
261
262const struct dkdriver foodkdriver = {
263	.d_strategy = foostrategy,
264};
265.Ed
266.Pp
267Once the disk is attached, metrics may be gathered on that disk.
268In order to gather metrics data, the driver must tell the framework when
269the disk starts and stops operations.
270This functionality is provided by the
271.Fn disk_busy
272and
273.Fn disk_unbusy
274routines.
275The
276.Fn disk_busy
277routine should be called immediately before a command to the disk is
278sent, e.g.:
279.Bd -literal
280void
281foostart(sc)
282	struct foo_softc *sc;
283{
284	[ . . . ]
285
286	/* Get buffer from drive's transfer queue. */
287	[ . . . ]
288
289	/* Build command to send to drive. */
290	[ . . . ]
291
292	/* Tell the disk framework we're going busy. */
293	disk_busy(\*[Am]sc-\*[Gt]sc_dk);
294
295	/* Send command to the drive. */
296	[ . . . ]
297}
298.Ed
299.Pp
300When
301.Fn disk_busy
302is called, a timestamp is taken if the disk's busy counter moves from
3030 to 1, indicating the disk has gone from an idle to non-idle state.
304Note that
305.Fn disk_busy
306must be called at
307.Fn splbio .
308At the end of a transaction, the
309.Fn disk_unbusy
310routine should be called.
311This routine performs some consistency checks,
312such as ensuring that the calls to
313.Fn disk_busy
314and
315.Fn disk_unbusy
316are balanced.
317This routine also performs the actual metrics calculation.
318A timestamp is taken, and the difference from the timestamp taken in
319.Fn disk_busy
320is added to the disk's total running time.
321The disk's timestamp is then updated in case there is more than one
322pending transfer on the disk.
323A byte count is also added to the disk's running total, and if greater than
324zero, the number of transfers the disk has performed is incremented.
325The third argument
326.Ar read
327specifies the direction of I/O;
328if non-zero it means reading from the disk,
329otherwise it means writing to the disk.
330.Bd -literal
331void
332foodone(xfer)
333	struct foo_xfer *xfer;
334{
335	struct foo_softc = (struct foo_softc *)xfer-\*[Gt]xf_softc;
336	struct buf *bp = xfer-\*[Gt]xf_buf;
337	long nbytes;
338	[ . . . ]
339
340	/*
341	 * Get number of bytes transfered.  If there is no buf
342	 * associated with the xfer, we are being called at the
343	 * end of a non-I/O command.
344	 */
345	if (bp == NULL)
346		nbytes = 0;
347	else
348		nbytes = bp-\*[Gt]b_bcount - bp-\*[Gt]b_resid;
349
350	[ . . . ]
351
352	/* Notify the disk framework that we've completed the transfer. */
353	disk_unbusy(\*[Am]sc-\*[Gt]sc_dk, nbytes,
354	    bp != NULL ? bp-\*[Gt]b_flags \*[Am] B_READ : 0);
355
356	[ . . . ]
357}
358.Ed
359.Pp
360Like
361.Fn disk_busy ,
362.Fn disk_unbusy
363must be called at
364.Fn splbio .
365.Sh CODE REFERENCES
366This section describes places within the
367.Nx
368source tree where actual
369code implementing or using the disk framework can be found.
370All pathnames are relative to
371.Pa /usr/src .
372.Pp
373The disk framework itself is implemented within the file
374.Pa sys/kern/subr_disk.c .
375Data structures and function prototypes for the framework are located in
376.Pa sys/sys/disk.h .
377.Pp
378The
379.Nx
380machine-independent SCSI disk and CD-ROM drivers use the
381disk framework.
382They are located in
383.Pa sys/scsi/sd.c
384and
385.Pa sys/scsi/cd.c .
386.Pp
387The
388.Nx
389.Nm ccd
390and
391.Nm vnd
392drivers use the detachment capability of the framework.
393They are located in
394.Pa sys/dev/ccd.c
395and
396.Pa sys/dev/vnd.c .
397.Sh SEE ALSO
398.Xr ccd 4 ,
399.Xr vnd 4 ,
400.Xr spl 9
401.Sh HISTORY
402The
403.Nx
404generic disk framework appeared in
405.Nx 1.2 .
406.Sh AUTHORS
407The
408.Nx
409generic disk framework was architected and implemented by
410.An Jason R. Thorpe
411.Aq thorpej@NetBSD.org .
412