xref: /netbsd-src/share/man/man9/disk.9 (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1.\"	$NetBSD: disk.9,v 1.40 2014/12/31 20:13:41 mlelstv Exp $
2.\"
3.\" Copyright (c) 1995, 1996 Jason R. Thorpe.
4.\" All rights reserved.
5.\"
6.\" Redistribution and use in source and binary forms, with or without
7.\" modification, are permitted provided that the following conditions
8.\" are met:
9.\" 1. Redistributions of source code must retain the above copyright
10.\"    notice, this list of conditions and the following disclaimer.
11.\" 2. Redistributions in binary form must reproduce the above copyright
12.\"    notice, this list of conditions and the following disclaimer in the
13.\"    documentation and/or other materials provided with the distribution.
14.\" 3. All advertising materials mentioning features or use of this software
15.\"    must display the following acknowledgement:
16.\"	This product includes software developed for the NetBSD Project
17.\"	by Jason R. Thorpe.
18.\" 4. The name of the author may not be used to endorse or promote products
19.\"    derived from this software without specific prior written permission.
20.\"
21.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26.\" BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27.\" LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
28.\" AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29.\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31.\" SUCH DAMAGE.
32.\"
33.Dd December 29, 2014
34.Dt DISK 9
35.Os
36.Sh NAME
37.Nm disk ,
38.Nm disk_init ,
39.Nm disk_attach ,
40.Nm disk_begindetach ,
41.Nm disk_detach ,
42.Nm disk_destroy ,
43.Nm disk_busy ,
44.Nm disk_unbusy ,
45.Nm disk_isbusy ,
46.Nm disk_find ,
47.Nm disk_set_info
48.Nd generic disk framework
49.Sh SYNOPSIS
50.In sys/types.h
51.In sys/disklabel.h
52.In sys/disk.h
53.Ft void
54.Fn disk_init "struct disk *" "const char *name" "const struct dkdriver *driver"
55.Ft void
56.Fn disk_attach "struct disk *"
57.Ft void
58.Fn disk_begindetach "struct disk *" "int (*lastclose)(device_t)" "device_t self" "int flags"
59.Ft void
60.Fn disk_detach "struct disk *"
61.Ft void
62.Fn disk_destroy "struct disk *"
63.Ft void
64.Fn disk_busy "struct disk *"
65.Ft void
66.Fn disk_unbusy "struct disk *" "long bcount" "int read"
67.Ft bool
68.Fn disk_isbusy "struct disk *"
69.Ft struct disk *
70.Fn disk_find "const char *"
71.Ft void
72.Fn disk_set_info "device_t" "struct disk *" "const char *type"
73.Sh DESCRIPTION
74The
75.Nx
76generic disk framework is designed to provide flexible,
77scalable, and consistent handling of disk state and metrics information.
78The fundamental component of this framework is the
79.Nm disk
80structure, which is defined as follows:
81.Bd -literal
82struct disk {
83	TAILQ_ENTRY(disk) dk_link;	/* link in global disklist */
84	const char	*dk_name;	/* disk name */
85	prop_dictionary_t dk_info;	/* reference to disk-info dictionary */
86	int		dk_bopenmask;	/* block devices open */
87	int		dk_copenmask;	/* character devices open */
88	int		dk_openmask;	/* composite (bopen|copen) */
89	int		dk_state;	/* label state   ### */
90	int		dk_blkshift;	/* shift to convert DEV_BSIZE to blks */
91	int		dk_byteshift;	/* shift to convert bytes to blks */
92
93	/*
94	 * Metrics data; note that some metrics may have no meaning
95	 * on certain types of disks.
96	 */
97	struct io_stats	*dk_stats;
98
99	const struct dkdriver *dk_driver;	/* pointer to driver */
100
101	/*
102	 * Information required to be the parent of a disk wedge.
103	 */
104	kmutex_t	dk_rawlock;	/* lock on these fields */
105	u_int		dk_rawopens;	/* # of openes of rawvp */
106	struct vnode	*dk_rawvp;	/* vnode for the RAW_PART bdev */
107
108	kmutex_t	dk_openlock;	/* lock on these and openmask */
109	u_int		dk_nwedges;	/* # of configured wedges */
110					/* all wedges on this disk */
111	LIST_HEAD(, dkwedge_softc) dk_wedges;
112
113	/*
114	 * Disk label information.  Storage for the in-core disk label
115	 * must be dynamically allocated, otherwise the size of this
116	 * structure becomes machine-dependent.
117	 */
118	daddr_t		dk_labelsector;		/* sector containing label */
119	struct disklabel *dk_label;	/* label */
120	struct cpu_disklabel *dk_cpulabel;
121};
122.Ed
123.Pp
124The system maintains a global linked-list of all disks attached to the
125system.
126This list, called
127.Nm disklist ,
128may grow or shrink over time as disks are dynamically added and removed
129from the system.
130Drivers which currently make use of the detachment
131capability of the framework are the
132.Nm ccd ,
133.Nm dm ,
134and
135.Nm vnd
136pseudo-device drivers.
137.Pp
138The following is a brief description of each function in the framework:
139.Bl -tag -width ".Fn disk_set_info"
140.It Fn disk_init
141Initialize the disk structure.
142.It Fn disk_attach
143Attach a disk; allocate storage for the disklabel, set the
144.Dq attached time
145timestamp, insert the disk into the disklist, and increment the
146system disk count.
147.It Fn disk_begindetach
148Check whether the disk is open, and if not, return 0.
149If the disk is open, and
150.Dv DETACH_FORCE
151is not set in
152.Fa flags ,
153return
154.Dv EBUSY .
155Otherwise, call the provided
156.Fa lastclose
157routine
158.Po
159if not
160.Dv NULL
161.Pc
162and return its exit code.
163.It Fn disk_detach
164Detach a disk; free storage for the disklabel, remove the disk
165from the disklist, and decrement the system disk count.
166If the count drops below zero, panic.
167.It Fn disk_destroy
168Release resources used by the disk structure when it is no longer
169required.
170.It Fn disk_busy
171Increment the disk's
172.Dq busy counter .
173If this counter goes from 0 to 1, set the timestamp corresponding to
174this transfer.
175.It Fn disk_unbusy
176Decrement a disk's busy counter.
177If the count drops below zero, panic.
178Get the current time, subtract it from the disk's timestamp, and add
179the difference to the disk's running total.
180Set the disk's timestamp to the current time.
181If the provided byte count is greater than 0, add it to the disk's
182running total and increment the number of transfers performed by the disk.
183The third argument
184.Ar read
185specifies the direction of I/O;
186if non-zero it means reading from the disk,
187otherwise it means writing to the disk.
188.It Fn disk_isbusy
189Returns
190.Ar true
191if disk is marked as busy and false if it is not.
192.It Fn disk_find
193Return a pointer to the disk structure corresponding to the name provided,
194or
195.Dv NULL
196if the disk does not exist.
197.It Fn disk_set_info
198Setup disk-info dictionary and other dependent values of the disk structure,
199the driver must have initialized the dk_geom member of
200.Fa struct disk
201with suitable values.
202If
203.Fa type
204is not
205.Dv NULL ,
206it will be added to the dictionary.
207.El
208.Pp
209The functions typically called by device drivers are
210.Fn disk_init
211.Fn disk_attach ,
212.Fn disk_begindetach ,
213.Fn disk_detach ,
214.Fn disk_destroy ,
215.Fn disk_busy ,
216.Fn disk_unbusy ,
217and
218.Fn disk_set_info .
219The function
220.Fn disk_find
221is provided as a utility function.
222.Sh DISK IOCTLS
223The following ioctls should be implemented by disk drivers:
224.Bl -tag -width "xxxxxx"
225.It Dv DIOCGDINFO "struct disklabel"
226Get disklabel.
227.It Dv DIOCSDINFO "struct disklabel"
228Set in-memory disklabel.
229.It Dv DIOCWDINFO "struct disklabel"
230Set in-memory disklabel and write on-disk disklabel.
231.It Dv DIOCGPART "struct partinfo"
232Get partition information.
233This is used internally.
234.It Dv DIOCRFORMAT "struct format_op"
235Read format.
236.It Dv DIOCWFORMAT "struct format_op"
237Write format.
238.It Dv DIOCSSTEP "int"
239Set step rate.
240.It Dv DIOCSRETRIES "int"
241Set number of retries.
242.It Dv DIOCKLABEL "int"
243Specify whether to keep or drop the in-memory disklabel
244when the device is closed.
245.It Dv DIOCWLABEL "int"
246Enable or disable writing to the part of the disk that contains the label.
247.It Dv DIOCSBAD "struct dkbad"
248Set kernel dkbad.
249.It Dv DIOCEJECT "int"
250Eject removable disk.
251.It Dv DIOCLOCK "int"
252Lock or unlock disk pack.
253For devices with removable media, locking is intended to prevent
254the operator from removing the media.
255.It Dv DIOCGDEFLABEL "struct disklabel"
256Get default label.
257.It Dv DIOCCLRLABEL
258Clear disk label.
259.It Dv DIOCGCACHE "int"
260Get status of disk read and write caches.
261The result is a bitmask containing the following values:
262.Bl -tag -width DKCACHE_RCHANGE
263.It Dv DKCACHE_READ
264Read cache enabled.
265.It Dv DKCACHE_WRITE
266Write(back) cache enabled.
267.It Dv DKCACHE_RCHANGE
268Read cache enable is changeable.
269.It Dv DKCACHE_WCHANGE
270Write cache enable is changeable.
271.It Dv DKCACHE_SAVE
272Cache parameters may be saved, so that they persist across reboots
273or device detach/attach cycles.
274.El
275.It Dv DIOCSCACHE "int"
276Set status of disk read and write caches.
277The input is a bitmask in the same format as used for
278.Dv DIOCGCACHE .
279.It Dv DIOCCACHESYNC "int"
280Synchronise the disk cache.
281This causes information in the disk's write cache (if any)
282to be flushed to stable storage.
283The argument specifies whether or not to force a flush even if
284the kernel believes that there is no outstanding data.
285.It Dv DIOCBSLIST "struct disk_badsecinfo"
286Get bad sector list.
287.It Dv DIOCBSFLUSH
288Flush bad sector list.
289.It Dv DIOCAWEDGE "struct dkwedge_info"
290Add wedge.
291.It Dv DIOCGWEDGEINFO "struct dkwedge_info"
292Get wedge information.
293.It Dv DIOCDWEDGE "struct dkwedge_info"
294Delete wedge.
295.It Dv DIOCLWEDGES "struct dkwedge_list"
296List wedges.
297.It Dv DIOCGSTRATEGY "struct disk_strategy"
298Get disk buffer queue strategy.
299.It Dv DIOCSSTRATEGY "struct disk_strategy"
300Set disk buffer queue strategy.
301.It Dv DIOCGDISKINFO "struct plistref"
302Get disk-info dictionary.
303.It Dv DIOCGMEDIASIZE "off_t"
304Get disk size in bytes.
305.It Dv DIOCGSECTORSIZE "u_int"
306Get sector size in bytes.
307.El
308.Sh USING THE FRAMEWORK
309This section includes a description on basic use of the framework
310and example usage of its functions.
311Actual implementation of a device driver which uses the framework
312may vary.
313.Pp
314Each device in the system uses a
315.Dq softc
316structure which contains autoconfiguration and state information for that
317device.
318In the case of disks, the softc should also contain one instance
319of the disk structure, e.g.:
320.Bd -literal
321struct foo_softc {
322	device_t	sc_dev;		/* generic device information */
323	struct	disk	sc_dk;		/* generic disk information */
324	[ . . . more . . . ]
325};
326.Ed
327.Pp
328In order for the system to gather metrics data about a disk, the disk must
329be registered with the system.
330The
331.Fn disk_attach
332routine performs all of the functions currently required to register a disk
333with the system including allocation of disklabel storage space,
334recording of the time since boot that the disk was attached, and insertion
335into the disklist.
336Note that since this function allocates storage space for the disklabel,
337it must be called before the disklabel is read from the media or used in
338any other way.
339Before
340.Fn disk_attach
341is called, a portions of the disk structure must be initialized with
342data specific to that disk.
343For example, in the
344.Dq foo
345disk driver, the following would be performed in the autoconfiguration
346.Dq attach
347routine:
348.Bd -literal
349void
350fooattach(device_t parent, device_t self, void *aux)
351{
352	struct foo_softc *sc = device_private(self);
353	[ . . . ]
354
355	/* Initialize and attach the disk structure. */
356	disk_init(\*[Am]sc-\*[Gt]sc_dk, device_xname(self), \*[Am]foodkdriver);
357	disk_attach(\*[Am]sc-\*[Gt]sc_dk);
358
359	/* Read geometry and fill in pertinent parts of disklabel. */
360	/* Initialize geometry values of the disk structure */
361	[ . . . ]
362	disk_set_info(\*[Am]self\*[Gt], \*[Am]sc-\*[Gt]sc_dk, type);
363}
364.Ed
365.Pp
366The
367.Nm foodkdriver
368above is the disk's
369.Dq driver
370switch.
371This switch currently includes a pointer to the disk's
372.Dq strategy
373routine.
374This switch needs to have global scope and should be initialized as follows:
375.Bd -literal
376void foostrategy(struct buf *);
377
378const struct dkdriver foodkdriver = {
379	.d_strategy = foostrategy,
380};
381.Ed
382.Pp
383Once the disk is attached, metrics may be gathered on that disk.
384In order to gather metrics data, the driver must tell the framework when
385the disk starts and stops operations.
386This functionality is provided by the
387.Fn disk_busy
388and
389.Fn disk_unbusy
390routines.
391Because
392.Nm struct disk
393is part of device driver private data it needs to be guarded.
394Mutual exclusion must be done by driver
395.Fn disk_busy
396and
397.Fn disk_unbusy
398are not thread safe.
399The
400.Fn disk_busy
401routine should be called immediately before a command to the disk is
402sent, e.g.:
403.Bd -literal
404void
405foostart(sc)
406	struct foo_softc *sc;
407{
408	[ . . . ]
409
410	/* Get buffer from drive's transfer queue. */
411	[ . . . ]
412
413	/* Build command to send to drive. */
414	[ . . . ]
415
416	/* Tell the disk framework we're going busy. */
417	mutex_enter(\*[Am]sc-\*[Gt]sc_dk_mtx);
418	disk_busy(\*[Am]sc-\*[Gt]sc_dk);
419	mutex_exit(\*[Am]sc-\*[Gt]sc_dk_mtx);
420
421	/* Send command to the drive. */
422	[ . . . ]
423}
424.Ed
425.Pp
426When
427.Fn disk_busy
428is called, a timestamp is taken if the disk's busy counter moves from
4290 to 1, indicating the disk has gone from an idle to non-idle state.
430At the end of a transaction, the
431.Fn disk_unbusy
432routine should be called.
433This routine performs some consistency checks,
434such as ensuring that the calls to
435.Fn disk_busy
436and
437.Fn disk_unbusy
438are balanced.
439This routine also performs the actual metrics calculation.
440A timestamp is taken and the difference from the timestamp taken in
441.Fn disk_busy
442is added to the disk's total running time.
443The disk's timestamp is then updated in case there is more than one
444pending transfer on the disk.
445A byte count is also added to the disk's running total, and if greater than
446zero, the number of transfers the disk has performed is incremented.
447The third argument
448.Ar read
449specifies the direction of I/O;
450if non-zero it means reading from the disk,
451otherwise it means writing to the disk.
452.Bd -literal
453void
454foodone(xfer)
455	struct foo_xfer *xfer;
456{
457	struct foo_softc = (struct foo_softc *)xfer-\*[Gt]xf_softc;
458	struct buf *bp = xfer-\*[Gt]xf_buf;
459	long nbytes;
460	[ . . . ]
461
462	/*
463	 * Get number of bytes transferred.  If there is no buf
464	 * associated with the xfer, we are being called at the
465	 * end of a non-I/O command.
466	 */
467	if (bp == NULL)
468		nbytes = 0;
469	else
470		nbytes = bp-\*[Gt]b_bcount - bp-\*[Gt]b_resid;
471
472	[ . . . ]
473
474	mutex_enter(\*[Am]sc-\*[Gt]sc_dk_mtx);
475	/* Notify the disk framework that we've completed the transfer. */
476	disk_unbusy(\*[Am]sc-\*[Gt]sc_dk, nbytes,
477	    bp != NULL ? bp-\*[Gt]b_flags \*[Am] B_READ : 0);
478	mutex_exit(\*[Am]sc-\*[Gt]sc_dk_mtx);
479
480	[ . . . ]
481}
482.Ed
483.Pp
484.Fn disk_isbusy
485is used to get status of disk device it returns true if device is
486currently busy and false if it is not.
487Like
488.Fn disk_busy
489and
490.Fn disk_unbusy
491it requires explicit locking from user side.
492.Sh CODE REFERENCES
493The disk framework itself is implemented within the file
494.Pa sys/kern/subr_disk.c .
495Data structures and function prototypes for the framework are located in
496.Pa sys/sys/disk.h .
497.Pp
498The
499.Nx
500machine-independent SCSI disk and CD-ROM drivers use the
501disk framework.
502They are located in
503.Pa sys/scsi/sd.c
504and
505.Pa sys/scsi/cd.c .
506.Pp
507The
508.Nx
509.Nm ccd ,
510.Nm dm ,
511and
512.Nm vnd
513drivers use the detachment capability of the framework.
514They are located in
515.Pa sys/dev/ccd.c ,
516.Pa sys/dev/vnd.c ,
517and
518.Pa sys/dev/dm/device-mapper.c .
519.Sh SEE ALSO
520.Xr ccd 4 ,
521.Xr dm 4 ,
522.Xr vnd 4
523.Sh HISTORY
524The
525.Nx
526generic disk framework appeared in
527.Nx 1.2 .
528.Sh AUTHORS
529The
530.Nx
531generic disk framework was architected and implemented by
532.An Jason R. Thorpe
533.Aq thorpej@NetBSD.org .
534