xref: /netbsd-src/share/man/man9/disk.9 (revision da9817918ec7e88db2912a2882967c7570a83f47)
1.\"	$NetBSD: disk.9,v 1.32 2009/05/20 06:38:39 wiz Exp $
2.\"
3.\" Copyright (c) 1995, 1996 Jason R. Thorpe.
4.\" All rights reserved.
5.\"
6.\" Redistribution and use in source and binary forms, with or without
7.\" modification, are permitted provided that the following conditions
8.\" are met:
9.\" 1. Redistributions of source code must retain the above copyright
10.\"    notice, this list of conditions and the following disclaimer.
11.\" 2. Redistributions in binary form must reproduce the above copyright
12.\"    notice, this list of conditions and the following disclaimer in the
13.\"    documentation and/or other materials provided with the distribution.
14.\" 3. All advertising materials mentioning features or use of this software
15.\"    must display the following acknowledgement:
16.\"	This product includes software developed for the NetBSD Project
17.\"	by Jason R. Thorpe.
18.\" 4. The name of the author may not be used to endorse or promote products
19.\"    derived from this software without specific prior written permission.
20.\"
21.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26.\" BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27.\" LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
28.\" AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29.\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31.\" SUCH DAMAGE.
32.\"
33.Dd May 19, 2009
34.Dt DISK 9
35.Os
36.Sh NAME
37.Nm disk ,
38.Nm disk_init ,
39.Nm disk_attach ,
40.Nm disk_begindetach ,
41.Nm disk_detach ,
42.Nm disk_destroy ,
43.Nm disk_busy ,
44.Nm disk_unbusy ,
45.Nm disk_find ,
46.Nm disk_blocksize
47.Nd generic disk framework
48.Sh SYNOPSIS
49.In sys/types.h
50.In sys/disklabel.h
51.In sys/disk.h
52.Ft void
53.Fn disk_init "struct disk *" "const char *name" "const struct dkdriver *driver"
54.Ft void
55.Fn disk_attach "struct disk *"
56.Ft void
57.Fn disk_begindetach "struct disk *" "int (*lastclose)(device_t)" "device_t self" "int flags"
58.Ft void
59.Fn disk_detach "struct disk *"
60.Ft void
61.Fn disk_destroy "struct disk *"
62.Ft void
63.Fn disk_busy "struct disk *"
64.Ft void
65.Fn disk_unbusy "struct disk *" "long bcount" "int read"
66.Ft struct disk *
67.Fn disk_find "const char *"
68.Ft void
69.Fn disk_blocksize "struct disk *" "int blocksize"
70.Sh DESCRIPTION
71The
72.Nx
73generic disk framework is designed to provide flexible,
74scalable, and consistent handling of disk state and metrics information.
75The fundamental component of this framework is the
76.Nm disk
77structure, which is defined as follows:
78.Bd -literal
79struct disk {
80	TAILQ_ENTRY(disk) dk_link;	/* link in global disklist */
81	const char	*dk_name;	/* disk name */
82	prop_dictionary_t dk_info;	/* reference to disk-info dictionary */
83	int		dk_bopenmask;	/* block devices open */
84	int		dk_copenmask;	/* character devices open */
85	int		dk_openmask;	/* composite (bopen|copen) */
86	int		dk_state;	/* label state   ### */
87	int		dk_blkshift;	/* shift to convert DEV_BSIZE to blks */
88	int		dk_byteshift;	/* shift to convert bytes to blks */
89
90	/*
91	 * Metrics data; note that some metrics may have no meaning
92	 * on certain types of disks.
93	 */
94	struct io_stats	*dk_stats;
95
96	const struct dkdriver *dk_driver;	/* pointer to driver */
97
98	/*
99	 * Information required to be the parent of a disk wedge.
100	 */
101	kmutex_t	dk_rawlock;	/* lock on these fields */
102	u_int		dk_rawopens;	/* # of openes of rawvp */
103	struct vnode	*dk_rawvp;	/* vnode for the RAW_PART bdev */
104
105	kmutex_t	dk_openlock;	/* lock on these and openmask */
106	u_int		dk_nwedges;	/* # of configured wedges */
107					/* all wedges on this disk */
108	LIST_HEAD(, dkwedge_softc) dk_wedges;
109
110	/*
111	 * Disk label information.  Storage for the in-core disk label
112	 * must be dynamically allocated, otherwise the size of this
113	 * structure becomes machine-dependent.
114	 */
115	daddr_t		dk_labelsector;		/* sector containing label */
116	struct disklabel *dk_label;	/* label */
117	struct cpu_disklabel *dk_cpulabel;
118};
119.Ed
120.Pp
121The system maintains a global linked-list of all disks attached to the
122system.
123This list, called
124.Nm disklist ,
125may grow or shrink over time as disks are dynamically added and removed
126from the system.
127Drivers which currently make use of the detachment
128capability of the framework are the
129.Nm ccd
130and
131.Nm vnd
132pseudo-device drivers.
133.Pp
134The following is a brief description of each function in the framework:
135.Bl -tag -width ".Fn disk_blocksize"
136.It Fn disk_init
137Initialize the disk structure.
138.It Fn disk_attach
139Attach a disk; allocate storage for the disklabel, set the
140.Dq attached time
141timestamp, insert the disk into the disklist, and increment the
142system disk count.
143.It Fn disk_begindetach
144Check whether the disk is open, and if not, return 0.
145If the disk is open, and
146.Dv DETACH_FORCE
147is not set in
148.Fa flags ,
149return
150.Dv EBUSY .
151Otherwise, call the provided
152.Fa lastclose
153routine
154.Po
155if not
156.Dv NULL
157.Pc
158and return its exit code.
159.It Fn disk_detach
160Detach a disk; free storage for the disklabel, remove the disk
161from the disklist, and decrement the system disk count.
162If the count drops below zero, panic.
163.It Fn disk_destroy
164Release resources used by the disk structure when it is no longer
165required.
166.It Fn disk_busy
167Increment the disk's
168.Dq busy counter .
169If this counter goes from 0 to 1, set the timestamp corresponding to
170this transfer.
171.It Fn disk_unbusy
172Decrement a disk's busy counter.
173If the count drops below zero, panic.
174Get the current time, subtract it from the disk's timestamp, and add
175the difference to the disk's running total.
176Set the disk's timestamp to the current time.
177If the provided byte count is greater than 0, add it to the disk's
178running total and increment the number of transfers performed by the disk.
179The third argument
180.Ar read
181specifies the direction of I/O;
182if non-zero it means reading from the disk,
183otherwise it means writing to the disk.
184.It Fn disk_find
185Return a pointer to the disk structure corresponding to the name provided,
186or NULL if the disk does not exist.
187.It Fn disk_blocksize
188Initialize
189.Fa dk_blkshift
190and
191.Fa dk_byteshift
192members of
193.Fa struct disk
194with suitable values derived from the supplied physical blocksize.
195It is only necessary to call this function if the device's physical blocksize
196is not
197.Dv DEV_BSIZE .
198.El
199.Pp
200The functions typically called by device drivers are
201.Fn disk_init
202.Fn disk_attach ,
203.Fn disk_begindetach ,
204.Fn disk_detach ,
205.Fn disk_destroy ,
206.Fn disk_busy ,
207.Fn disk_unbusy ,
208and
209.Fn disk_blocksize .
210The function
211.Fn disk_find
212is provided as a utility function.
213.Sh DISK IOCTLS
214The following ioctls should be implemented by disk drivers:
215.Bl -tag -width "xxxxxx"
216.It Dv DIOCGDINFO "struct disklabel"
217Get disklabel.
218.It Dv DIOCSDINFO "struct disklabel"
219Set in-memory disklabel.
220.It Dv DIOCWDINFO "struct disklabel"
221Set in-memory disklabel, and write on-disk disklabel.
222.It Dv DIOCGPART "struct partinfo"
223Get partition information.
224This is used internally.
225.It Dv DIOCRFORMAT "struct format_op"
226Read format.
227.It Dv DIOCWFORMAT "struct format_op"
228Write format.
229.It Dv DIOCSSTEP "int"
230Set step rate.
231.It Dv DIOCSRETRIES "int"
232Set number of retries.
233.It Dv DIOCKLABEL "int"
234Specify whether to keep or drop the in-memory disklabel
235when the device is closed.
236.It Dv DIOCWLABEL "int"
237Enable or disable writing to the part of the disk that contains the label.
238.It Dv DIOCSBAD "struct dkbad"
239Set kernel dkbad.
240.It Dv DIOCEJECT "int"
241Eject removable disk.
242.It Dv DIOCLOCK "int"
243Lock or unlock disk pack.
244For devices with removable media, locking is intended to prevent
245the operator from removing the media.
246.It Dv DIOCGDEFLABEL "struct disklabel"
247Get default label.
248.It Dv DIOCCLRLABEL
249Clear disk label.
250.It Dv DIOCGCACHE "int"
251Get status of disk read and write caches.
252The result is a bitmask containing the following values:
253.Bl -tag -width DKCACHE_RCHANGE
254.It Dv DKCACHE_READ
255Read cache enabled.
256.It Dv DKCACHE_WRITE
257Write(back) cache enabled.
258.It Dv DKCACHE_RCHANGE
259Read cache enable is changeable.
260.It Dv DKCACHE_WCHANGE
261Write cache enable is changeable.
262.It Dv DKCACHE_SAVE
263Cache parameters may be saved, so that they persist across reboots
264or device detach/attach cycles.
265.El
266.It Dv DIOCSCACHE "int"
267Set status of disk read and write caches.
268The input is a bitmask in the same format as used for
269.Dv DIOCGCACHE .
270.It Dv DIOCCACHESYNC "int"
271Synchronise the disk cache.
272This causes information in the disk's write cache (if any)
273to be flushed to stable storage.
274The argument specifies whether or not to force a flush even if
275the kernel believes that there is no outstanding data.
276.It Dv DIOCBSLIST "struct disk_badsecinfo"
277Get bad sector list.
278.It Dv DIOCBSFLUSH
279Flush bad sector list.
280.It Dv DIOCAWEDGE "struct dkwedge_info"
281Add wedge.
282.It Dv DIOCGWEDGEINFO "struct dkwedge_info"
283Get wedge information.
284.It Dv DIOCDWEDGE "struct dkwedge_info"
285Delete wedge.
286.It Dv DIOCLWEDGES "struct dkwedge_list"
287List wedges.
288.It Dv DIOCGSTRATEGY "struct disk_strategy"
289Get disk buffer queue strategy.
290.It Dv DIOCSSTRATEGY "struct disk_strategy"
291Set disk buffer queue strategy.
292.It Dv DIOCGDISKINFO "struct plistref"
293Get disk-info dictionary.
294.El
295.Sh USING THE FRAMEWORK
296This section includes a description on basic use of the framework
297and example usage of its functions.
298Actual implementation of a device driver which uses the framework
299may vary.
300.Pp
301Each device in the system uses a
302.Dq softc
303structure which contains autoconfiguration and state information for that
304device.
305In the case of disks, the softc should also contain one instance
306of the disk structure, e.g.:
307.Bd -literal
308struct foo_softc {
309	device_t	sc_dev;		/* generic device information */
310	struct	disk	sc_dk;		/* generic disk information */
311	[ . . . more . . . ]
312};
313.Ed
314.Pp
315In order for the system to gather metrics data about a disk, the disk must
316be registered with the system.
317The
318.Fn disk_attach
319routine performs all of the functions currently required to register a disk
320with the system including allocation of disklabel storage space,
321recording of the time since boot that the disk was attached, and insertion
322into the disklist.
323Note that since this function allocates storage space for the disklabel,
324it must be called before the disklabel is read from the media or used in
325any other way.
326Before
327.Fn disk_attach
328is called, a portions of the disk structure must be initialized with
329data specific to that disk.
330For example, in the
331.Dq foo
332disk driver, the following would be performed in the autoconfiguration
333.Dq attach
334routine:
335.Bd -literal
336void
337fooattach(device_t parent, device_t self, void *aux)
338{
339	struct foo_softc *sc = device_private(self);
340	[ . . . ]
341
342	/* Initialize and attach the disk structure. */
343	disk_init(\*[Am]sc-\*[Gt]sc_dk, device_xname(self), \*[Am]foodkdriver);
344	disk_attach(\*[Am]sc-\*[Gt]sc_dk);
345
346	/* Read geometry and fill in pertinent parts of disklabel. */
347	[ . . . ]
348	disk_blocksize(\*[Am]sc-\*[Gt]sc_dk, bytes_per_sector);
349}
350.Ed
351.Pp
352The
353.Nm foodkdriver
354above is the disk's
355.Dq driver
356switch.
357This switch currently includes a pointer to the disk's
358.Dq strategy
359routine.
360This switch needs to have global scope and should be initialized as follows:
361.Bd -literal
362void foostrategy(struct buf *);
363
364const struct dkdriver foodkdriver = {
365	.d_strategy = foostrategy,
366};
367.Ed
368.Pp
369Once the disk is attached, metrics may be gathered on that disk.
370In order to gather metrics data, the driver must tell the framework when
371the disk starts and stops operations.
372This functionality is provided by the
373.Fn disk_busy
374and
375.Fn disk_unbusy
376routines.
377The
378.Fn disk_busy
379routine should be called immediately before a command to the disk is
380sent, e.g.:
381.Bd -literal
382void
383foostart(sc)
384	struct foo_softc *sc;
385{
386	[ . . . ]
387
388	/* Get buffer from drive's transfer queue. */
389	[ . . . ]
390
391	/* Build command to send to drive. */
392	[ . . . ]
393
394	/* Tell the disk framework we're going busy. */
395	disk_busy(\*[Am]sc-\*[Gt]sc_dk);
396
397	/* Send command to the drive. */
398	[ . . . ]
399}
400.Ed
401.Pp
402When
403.Fn disk_busy
404is called, a timestamp is taken if the disk's busy counter moves from
4050 to 1, indicating the disk has gone from an idle to non-idle state.
406Note that
407.Fn disk_busy
408must be called at
409.Fn splbio .
410At the end of a transaction, the
411.Fn disk_unbusy
412routine should be called.
413This routine performs some consistency checks,
414such as ensuring that the calls to
415.Fn disk_busy
416and
417.Fn disk_unbusy
418are balanced.
419This routine also performs the actual metrics calculation.
420A timestamp is taken, and the difference from the timestamp taken in
421.Fn disk_busy
422is added to the disk's total running time.
423The disk's timestamp is then updated in case there is more than one
424pending transfer on the disk.
425A byte count is also added to the disk's running total, and if greater than
426zero, the number of transfers the disk has performed is incremented.
427The third argument
428.Ar read
429specifies the direction of I/O;
430if non-zero it means reading from the disk,
431otherwise it means writing to the disk.
432.Bd -literal
433void
434foodone(xfer)
435	struct foo_xfer *xfer;
436{
437	struct foo_softc = (struct foo_softc *)xfer-\*[Gt]xf_softc;
438	struct buf *bp = xfer-\*[Gt]xf_buf;
439	long nbytes;
440	[ . . . ]
441
442	/*
443	 * Get number of bytes transferred.  If there is no buf
444	 * associated with the xfer, we are being called at the
445	 * end of a non-I/O command.
446	 */
447	if (bp == NULL)
448		nbytes = 0;
449	else
450		nbytes = bp-\*[Gt]b_bcount - bp-\*[Gt]b_resid;
451
452	[ . . . ]
453
454	/* Notify the disk framework that we've completed the transfer. */
455	disk_unbusy(\*[Am]sc-\*[Gt]sc_dk, nbytes,
456	    bp != NULL ? bp-\*[Gt]b_flags \*[Am] B_READ : 0);
457
458	[ . . . ]
459}
460.Ed
461.Pp
462Like
463.Fn disk_busy ,
464.Fn disk_unbusy
465must be called at
466.Fn splbio .
467.Sh CODE REFERENCES
468This section describes places within the
469.Nx
470source tree where actual
471code implementing or using the disk framework can be found.
472All pathnames are relative to
473.Pa /usr/src .
474.Pp
475The disk framework itself is implemented within the file
476.Pa sys/kern/subr_disk.c .
477Data structures and function prototypes for the framework are located in
478.Pa sys/sys/disk.h .
479.Pp
480The
481.Nx
482machine-independent SCSI disk and CD-ROM drivers use the
483disk framework.
484They are located in
485.Pa sys/scsi/sd.c
486and
487.Pa sys/scsi/cd.c .
488.Pp
489The
490.Nx
491.Nm ccd
492and
493.Nm vnd
494drivers use the detachment capability of the framework.
495They are located in
496.Pa sys/dev/ccd.c
497and
498.Pa sys/dev/vnd.c .
499.Sh SEE ALSO
500.Xr ccd 4 ,
501.Xr vnd 4 ,
502.Xr spl 9
503.Sh HISTORY
504The
505.Nx
506generic disk framework appeared in
507.Nx 1.2 .
508.Sh AUTHORS
509The
510.Nx
511generic disk framework was architected and implemented by
512.An Jason R. Thorpe
513.Aq thorpej@NetBSD.org .
514