xref: /netbsd-src/sys/dev/ata/wd.c (revision fad4c9f71477ae11cea2ee75ec82151ac770a534)
1 /*	$NetBSD: wd.c,v 1.326 2006/06/24 04:49:40 gendalia Exp $ */
2 
3 /*
4  * Copyright (c) 1998, 2001 Manuel Bouyer.  All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *	notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *	notice, this list of conditions and the following disclaimer in the
13  *	documentation and/or other materials provided with the distribution.
14  * 3. All advertising materials mentioning features or use of this software
15  *	must display the following acknowledgement:
16  *  This product includes software developed by Manuel Bouyer.
17  * 4. The name of the author may not be used to endorse or promote products
18  *	derived from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*-
33  * Copyright (c) 1998, 2003, 2004 The NetBSD Foundation, Inc.
34  * All rights reserved.
35  *
36  * This code is derived from software contributed to The NetBSD Foundation
37  * by Charles M. Hannum and by Onno van der Linden.
38  *
39  * Redistribution and use in source and binary forms, with or without
40  * modification, are permitted provided that the following conditions
41  * are met:
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. All advertising materials mentioning features or use of this software
48  *    must display the following acknowledgement:
49  *        This product includes software developed by the NetBSD
50  *        Foundation, Inc. and its contributors.
51  * 4. Neither the name of The NetBSD Foundation nor the names of its
52  *    contributors may be used to endorse or promote products derived
53  *    from this software without specific prior written permission.
54  *
55  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
56  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
57  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
58  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
59  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
60  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
61  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
62  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
63  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
64  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
65  * POSSIBILITY OF SUCH DAMAGE.
66  */
67 
68 #include <sys/cdefs.h>
69 __KERNEL_RCSID(0, "$NetBSD: wd.c,v 1.326 2006/06/24 04:49:40 gendalia Exp $");
70 
71 #ifndef ATADEBUG
72 #define ATADEBUG
73 #endif /* ATADEBUG */
74 
75 #include "rnd.h"
76 
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/kernel.h>
80 #include <sys/conf.h>
81 #include <sys/file.h>
82 #include <sys/stat.h>
83 #include <sys/ioctl.h>
84 #include <sys/buf.h>
85 #include <sys/bufq.h>
86 #include <sys/uio.h>
87 #include <sys/malloc.h>
88 #include <sys/device.h>
89 #include <sys/disklabel.h>
90 #include <sys/disk.h>
91 #include <sys/syslog.h>
92 #include <sys/proc.h>
93 #include <sys/vnode.h>
94 #if NRND > 0
95 #include <sys/rnd.h>
96 #endif
97 
98 #include <machine/intr.h>
99 #include <machine/bus.h>
100 
101 #include <dev/ata/atareg.h>
102 #include <dev/ata/atavar.h>
103 #include <dev/ata/wdvar.h>
104 #include <dev/ic/wdcreg.h>
105 #include <sys/ataio.h>
106 #include "locators.h"
107 
108 #define	LBA48_THRESHOLD		(0xfffffff)	/* 128GB / DEV_BSIZE */
109 
110 #define	WDIORETRIES_SINGLE 4	/* number of retries before single-sector */
111 #define	WDIORETRIES	5	/* number of retries before giving up */
112 #define	RECOVERYTIME hz/2	/* time to wait before retrying a cmd */
113 
114 #define	WDUNIT(dev)		DISKUNIT(dev)
115 #define	WDPART(dev)		DISKPART(dev)
116 #define	WDMINOR(unit, part)	DISKMINOR(unit, part)
117 #define	MAKEWDDEV(maj, unit, part)	MAKEDISKDEV(maj, unit, part)
118 
119 #define	WDLABELDEV(dev)	(MAKEWDDEV(major(dev), WDUNIT(dev), RAW_PART))
120 
121 #define DEBUG_INTR   0x01
122 #define DEBUG_XFERS  0x02
123 #define DEBUG_STATUS 0x04
124 #define DEBUG_FUNCS  0x08
125 #define DEBUG_PROBE  0x10
126 #ifdef ATADEBUG
127 int wdcdebug_wd_mask = 0x0;
128 #define ATADEBUG_PRINT(args, level) \
129 	if (wdcdebug_wd_mask & (level)) \
130 		printf args
131 #else
132 #define ATADEBUG_PRINT(args, level)
133 #endif
134 
135 int	wdprobe(struct device *, struct cfdata *, void *);
136 void	wdattach(struct device *, struct device *, void *);
137 int	wddetach(struct device *, int);
138 int	wdactivate(struct device *, enum devact);
139 int	wdprint(void *, char *);
140 void	wdperror(const struct wd_softc *);
141 
142 CFATTACH_DECL(wd, sizeof(struct wd_softc),
143     wdprobe, wdattach, wddetach, wdactivate);
144 
145 extern struct cfdriver wd_cd;
146 
147 dev_type_open(wdopen);
148 dev_type_close(wdclose);
149 dev_type_read(wdread);
150 dev_type_write(wdwrite);
151 dev_type_ioctl(wdioctl);
152 dev_type_strategy(wdstrategy);
153 dev_type_dump(wddump);
154 dev_type_size(wdsize);
155 
156 const struct bdevsw wd_bdevsw = {
157 	wdopen, wdclose, wdstrategy, wdioctl, wddump, wdsize, D_DISK
158 };
159 
160 const struct cdevsw wd_cdevsw = {
161 	wdopen, wdclose, wdread, wdwrite, wdioctl,
162 	nostop, notty, nopoll, nommap, nokqfilter, D_DISK
163 };
164 
165 /*
166  * Glue necessary to hook WDCIOCCOMMAND into physio
167  */
168 
169 struct wd_ioctl {
170 	LIST_ENTRY(wd_ioctl) wi_list;
171 	struct buf wi_bp;
172 	struct uio wi_uio;
173 	struct iovec wi_iov;
174 	atareq_t wi_atareq;
175 	struct wd_softc *wi_softc;
176 };
177 
178 LIST_HEAD(, wd_ioctl) wi_head;
179 
180 struct	wd_ioctl *wi_find(struct buf *);
181 void	wi_free(struct wd_ioctl *);
182 struct	wd_ioctl *wi_get(void);
183 void	wdioctlstrategy(struct buf *);
184 
185 void  wdgetdefaultlabel(struct wd_softc *, struct disklabel *);
186 void  wdgetdisklabel(struct wd_softc *);
187 void  wdstart(void *);
188 void  __wdstart(struct wd_softc*, struct buf *);
189 void  wdrestart(void *);
190 void  wddone(void *);
191 int   wd_get_params(struct wd_softc *, u_int8_t, struct ataparams *);
192 int   wd_standby(struct wd_softc *, int);
193 int   wd_flushcache(struct wd_softc *, int);
194 void  wd_shutdown(void *);
195 
196 int   wd_getcache(struct wd_softc *, int *);
197 int   wd_setcache(struct wd_softc *, int);
198 
199 struct dkdriver wddkdriver = { wdstrategy, minphys };
200 
201 #ifdef HAS_BAD144_HANDLING
202 static void bad144intern(struct wd_softc *);
203 #endif
204 
205 #define	WD_QUIRK_SPLIT_MOD15_WRITE	0x0001	/* must split certain writes */
206 #define	WD_QUIRK_FORCE_LBA48		0x0002	/* must use LBA48 commands */
207 
208 /*
209  * Quirk table for IDE drives.  Put more-specific matches first, since
210  * a simple globbing routine is used for matching.
211  */
212 static const struct wd_quirk {
213 	const char *wdq_match;		/* inquiry pattern to match */
214 	int wdq_quirks;			/* drive quirks */
215 } wd_quirk_table[] = {
216 	/*
217 	 * Some Seagate S-ATA drives have a PHY which can get confused
218 	 * with the way data is packetized by some S-ATA controllers.
219 	 *
220 	 * The work-around is to split in two any write transfer whose
221 	 * sector count % 15 == 1 (assuming 512 byte sectors).
222 	 *
223 	 * XXX This is an incomplete list.  There are at least a couple
224 	 * XXX more model numbers.  If you have trouble with such transfers
225 	 * XXX (8K is the most common) on Seagate S-ATA drives, please
226 	 * XXX notify thorpej@NetBSD.org.
227 	 */
228 	{ "ST3120023AS",
229 	  WD_QUIRK_SPLIT_MOD15_WRITE },
230 	{ "ST380023AS",
231 	  WD_QUIRK_SPLIT_MOD15_WRITE },
232 
233 	/*
234 	 * These seagate drives seems to have issue addressing sector 0xfffffff
235 	 * (aka LBA48_THRESHOLD) in LBA mode. The workaround is to force
236 	 * LBA48
237 	 * Note that we can't just change the code to always use LBA48 for
238 	 * sector 0xfffffff, because this would break valid and working
239 	 * setups using LBA48 drives on non-LBA48-capable controllers
240 	 * (and it's hard to get a list of such controllers)
241 	 */
242 	{ "ST3160021A*",
243 	  WD_QUIRK_FORCE_LBA48 },
244 	{ "ST3160812A*",
245 	  WD_QUIRK_FORCE_LBA48 },
246 	{ "ST3160023A*",
247 	  WD_QUIRK_FORCE_LBA48 },
248 	{ "ST3160827A*",
249 	  WD_QUIRK_FORCE_LBA48 },
250 	/* Attempt to catch all seagate drives larger than 200GB */
251 	{ "ST3[2-9][0-9][0-9][0-9][0-9][0-9][A-Z]*",
252 	  WD_QUIRK_FORCE_LBA48 },
253 	{ NULL,
254 	  0 }
255 };
256 
257 static const struct wd_quirk *
258 wd_lookup_quirks(const char *name)
259 {
260 	const struct wd_quirk *wdq;
261 	const char *estr;
262 
263 	for (wdq = wd_quirk_table; wdq->wdq_match != NULL; wdq++) {
264 		/*
265 		 * We only want exact matches (which include matches
266 		 * against globbing characters).
267 		 */
268 		if (pmatch(name, wdq->wdq_match, &estr) == 2)
269 			return (wdq);
270 	}
271 	return (NULL);
272 }
273 
274 int
275 wdprobe(struct device *parent, struct cfdata *match, void *aux)
276 {
277 	struct ata_device *adev = aux;
278 
279 	if (adev == NULL)
280 		return 0;
281 	if (adev->adev_bustype->bustype_type != SCSIPI_BUSTYPE_ATA)
282 		return 0;
283 
284 	if (match->cf_loc[ATA_HLCF_DRIVE] != ATA_HLCF_DRIVE_DEFAULT &&
285 	    match->cf_loc[ATA_HLCF_DRIVE] != adev->adev_drv_data->drive)
286 		return 0;
287 	return 1;
288 }
289 
290 void
291 wdattach(struct device *parent, struct device *self, void *aux)
292 {
293 	struct wd_softc *wd = (void *)self;
294 	struct ata_device *adev= aux;
295 	int i, blank;
296 	char tbuf[41], pbuf[9], c, *p, *q;
297 	const struct wd_quirk *wdq;
298 	ATADEBUG_PRINT(("wdattach\n"), DEBUG_FUNCS | DEBUG_PROBE);
299 
300 	callout_init(&wd->sc_restart_ch);
301 	bufq_alloc(&wd->sc_q, BUFQ_DISK_DEFAULT_STRAT, BUFQ_SORT_RAWBLOCK);
302 #ifdef WD_SOFTBADSECT
303 	SLIST_INIT(&wd->sc_bslist);
304 #endif
305 	wd->atabus = adev->adev_bustype;
306 	wd->openings = adev->adev_openings;
307 	wd->drvp = adev->adev_drv_data;
308 
309 	wd->drvp->drv_done = wddone;
310 	wd->drvp->drv_softc = &wd->sc_dev;
311 
312 	aprint_naive("\n");
313 
314 	/* read our drive info */
315 	if (wd_get_params(wd, AT_WAIT, &wd->sc_params) != 0) {
316 		aprint_error("\n%s: IDENTIFY failed\n", wd->sc_dev.dv_xname);
317 		return;
318 	}
319 
320 	for (blank = 0, p = wd->sc_params.atap_model, q = tbuf, i = 0;
321 	    i < sizeof(wd->sc_params.atap_model); i++) {
322 		c = *p++;
323 		if (c == '\0')
324 			break;
325 		if (c != ' ') {
326 			if (blank) {
327 				*q++ = ' ';
328 				blank = 0;
329 			}
330 			*q++ = c;
331 		} else
332 			blank = 1;
333 	}
334 	*q++ = '\0';
335 
336 	aprint_normal(": <%s>\n", tbuf);
337 
338 	wdq = wd_lookup_quirks(tbuf);
339 	if (wdq != NULL)
340 		wd->sc_quirks = wdq->wdq_quirks;
341 
342 	if ((wd->sc_params.atap_multi & 0xff) > 1) {
343 		wd->sc_multi = wd->sc_params.atap_multi & 0xff;
344 	} else {
345 		wd->sc_multi = 1;
346 	}
347 
348 	aprint_normal("%s: drive supports %d-sector PIO transfers,",
349 	    wd->sc_dev.dv_xname, wd->sc_multi);
350 
351 	/* 48-bit LBA addressing */
352 	if ((wd->sc_params.atap_cmd2_en & ATA_CMD2_LBA48) != 0)
353 		wd->sc_flags |= WDF_LBA48;
354 
355 	/* Prior to ATA-4, LBA was optional. */
356 	if ((wd->sc_params.atap_capabilities1 & WDC_CAP_LBA) != 0)
357 		wd->sc_flags |= WDF_LBA;
358 #if 0
359 	/* ATA-4 requires LBA. */
360 	if (wd->sc_params.atap_ataversion != 0xffff &&
361 	    wd->sc_params.atap_ataversion >= WDC_VER_ATA4)
362 		wd->sc_flags |= WDF_LBA;
363 #endif
364 
365 	if ((wd->sc_flags & WDF_LBA48) != 0) {
366 		aprint_normal(" LBA48 addressing\n");
367 		wd->sc_capacity =
368 		    ((u_int64_t) wd->sc_params.__reserved6[11] << 48) |
369 		    ((u_int64_t) wd->sc_params.__reserved6[10] << 32) |
370 		    ((u_int64_t) wd->sc_params.__reserved6[9]  << 16) |
371 		    ((u_int64_t) wd->sc_params.__reserved6[8]  << 0);
372 	} else if ((wd->sc_flags & WDF_LBA) != 0) {
373 		aprint_normal(" LBA addressing\n");
374 		wd->sc_capacity =
375 		    ((u_int64_t)wd->sc_params.atap_capacity[1] << 16) |
376 		    wd->sc_params.atap_capacity[0];
377 	} else {
378 		aprint_normal(" chs addressing\n");
379 		wd->sc_capacity =
380 		    wd->sc_params.atap_cylinders *
381 		    wd->sc_params.atap_heads *
382 		    wd->sc_params.atap_sectors;
383 	}
384 	format_bytes(pbuf, sizeof(pbuf), wd->sc_capacity * DEV_BSIZE);
385 	aprint_normal("%s: %s, %d cyl, %d head, %d sec, "
386 	    "%d bytes/sect x %llu sectors\n",
387 	    self->dv_xname, pbuf,
388 	    (wd->sc_flags & WDF_LBA) ? (int)(wd->sc_capacity /
389 		(wd->sc_params.atap_heads * wd->sc_params.atap_sectors)) :
390 		wd->sc_params.atap_cylinders,
391 	    wd->sc_params.atap_heads, wd->sc_params.atap_sectors,
392 	    DEV_BSIZE, (unsigned long long)wd->sc_capacity);
393 
394 	ATADEBUG_PRINT(("%s: atap_dmatiming_mimi=%d, atap_dmatiming_recom=%d\n",
395 	    self->dv_xname, wd->sc_params.atap_dmatiming_mimi,
396 	    wd->sc_params.atap_dmatiming_recom), DEBUG_PROBE);
397 	/*
398 	 * Initialize and attach the disk structure.
399 	 */
400 	wd->sc_dk.dk_driver = &wddkdriver;
401 	wd->sc_dk.dk_name = wd->sc_dev.dv_xname;
402 	disk_attach(&wd->sc_dk);
403 	wd->sc_wdc_bio.lp = wd->sc_dk.dk_label;
404 	wd->sc_sdhook = shutdownhook_establish(wd_shutdown, wd);
405 	if (wd->sc_sdhook == NULL)
406 		aprint_error("%s: WARNING: unable to establish shutdown hook\n",
407 		    wd->sc_dev.dv_xname);
408 #if NRND > 0
409 	rnd_attach_source(&wd->rnd_source, wd->sc_dev.dv_xname,
410 			  RND_TYPE_DISK, 0);
411 #endif
412 
413 	/* Discover wedges on this disk. */
414 	dkwedge_discover(&wd->sc_dk);
415 }
416 
417 int
418 wdactivate(struct device *self, enum devact act)
419 {
420 	int rv = 0;
421 
422 	switch (act) {
423 	case DVACT_ACTIVATE:
424 		rv = EOPNOTSUPP;
425 		break;
426 
427 	case DVACT_DEACTIVATE:
428 		/*
429 		 * Nothing to do; we key off the device's DVF_ACTIVATE.
430 		 */
431 		break;
432 	}
433 	return (rv);
434 }
435 
436 int
437 wddetach(struct device *self, int flags)
438 {
439 	struct wd_softc *sc = (struct wd_softc *)self;
440 	int s, bmaj, cmaj, i, mn;
441 
442 	/* locate the major number */
443 	bmaj = bdevsw_lookup_major(&wd_bdevsw);
444 	cmaj = cdevsw_lookup_major(&wd_cdevsw);
445 
446 	/* Nuke the vnodes for any open instances. */
447 	for (i = 0; i < MAXPARTITIONS; i++) {
448 		mn = WDMINOR(device_unit(self), i);
449 		vdevgone(bmaj, mn, mn, VBLK);
450 		vdevgone(cmaj, mn, mn, VCHR);
451 	}
452 
453 	/* Delete all of our wedges. */
454 	dkwedge_delall(&sc->sc_dk);
455 
456 	s = splbio();
457 
458 	/* Kill off any queued buffers. */
459 	bufq_drain(sc->sc_q);
460 
461 	bufq_free(sc->sc_q);
462 	sc->atabus->ata_killpending(sc->drvp);
463 
464 	splx(s);
465 
466 	/* Detach disk. */
467 	disk_detach(&sc->sc_dk);
468 
469 #ifdef WD_SOFTBADSECT
470 	/* Clean out the bad sector list */
471 	while (!SLIST_EMPTY(&sc->sc_bslist)) {
472 		void *head = SLIST_FIRST(&sc->sc_bslist);
473 		SLIST_REMOVE_HEAD(&sc->sc_bslist, dbs_next);
474 		free(head, M_TEMP);
475 	}
476 	sc->sc_bscount = 0;
477 #endif
478 
479 	/* Get rid of the shutdown hook. */
480 	if (sc->sc_sdhook != NULL)
481 		shutdownhook_disestablish(sc->sc_sdhook);
482 
483 #if NRND > 0
484 	/* Unhook the entropy source. */
485 	rnd_detach_source(&sc->rnd_source);
486 #endif
487 
488 	sc->drvp->drive_flags = 0; /* no drive any more here */
489 
490 	return (0);
491 }
492 
493 /*
494  * Read/write routine for a buffer.  Validates the arguments and schedules the
495  * transfer.  Does not wait for the transfer to complete.
496  */
497 void
498 wdstrategy(struct buf *bp)
499 {
500 	struct wd_softc *wd = device_lookup(&wd_cd, WDUNIT(bp->b_dev));
501 	struct disklabel *lp = wd->sc_dk.dk_label;
502 	daddr_t blkno;
503 	int s;
504 
505 	ATADEBUG_PRINT(("wdstrategy (%s)\n", wd->sc_dev.dv_xname),
506 	    DEBUG_XFERS);
507 
508 	/* Valid request?  */
509 	if (bp->b_blkno < 0 ||
510 	    (bp->b_bcount % lp->d_secsize) != 0 ||
511 	    (bp->b_bcount / lp->d_secsize) >= (1 << NBBY)) {
512 		bp->b_error = EINVAL;
513 		goto bad;
514 	}
515 
516 	/* If device invalidated (e.g. media change, door open), error. */
517 	if ((wd->sc_flags & WDF_LOADED) == 0) {
518 		bp->b_error = EIO;
519 		goto bad;
520 	}
521 
522 	/* If it's a null transfer, return immediately. */
523 	if (bp->b_bcount == 0)
524 		goto done;
525 
526 	/*
527 	 * Do bounds checking, adjust transfer. if error, process.
528 	 * If end of partition, just return.
529 	 */
530 	if (WDPART(bp->b_dev) == RAW_PART) {
531 		if (bounds_check_with_mediasize(bp, DEV_BSIZE,
532 		    wd->sc_capacity) <= 0)
533 			goto done;
534 	} else {
535 		if (bounds_check_with_label(&wd->sc_dk, bp,
536 		    (wd->sc_flags & (WDF_WLABEL|WDF_LABELLING)) != 0) <= 0)
537 			goto done;
538 	}
539 
540 	/*
541 	 * Now convert the block number to absolute and put it in
542 	 * terms of the device's logical block size.
543 	 */
544 	if (lp->d_secsize >= DEV_BSIZE)
545 		blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE);
546 	else
547 		blkno = bp->b_blkno * (DEV_BSIZE / lp->d_secsize);
548 
549 	if (WDPART(bp->b_dev) != RAW_PART)
550 		blkno += lp->d_partitions[WDPART(bp->b_dev)].p_offset;
551 
552 	bp->b_rawblkno = blkno;
553 
554 #ifdef WD_SOFTBADSECT
555 	/*
556 	 * If the transfer about to be attempted contains only a block that
557 	 * is known to be bad then return an error for the transfer without
558 	 * even attempting to start a transfer up under the premis that we
559 	 * will just end up doing more retries for a transfer that will end
560 	 * up failing again.
561 	 * XXX:SMP - mutex required to protect with DIOCBSFLUSH
562 	 */
563 	if (__predict_false(!SLIST_EMPTY(&wd->sc_bslist))) {
564 		struct disk_badsectors *dbs;
565 		daddr_t maxblk = blkno + (bp->b_bcount >> DEV_BSHIFT) - 1;
566 
567 		SLIST_FOREACH(dbs, &wd->sc_bslist, dbs_next)
568 			if ((dbs->dbs_min <= blkno && blkno <= dbs->dbs_max) ||
569 			    (dbs->dbs_min <= maxblk && maxblk <= dbs->dbs_max)){
570 				bp->b_error = EIO;
571 				goto bad;
572 			}
573 	}
574 #endif
575 
576 	/* Queue transfer on drive, activate drive and controller if idle. */
577 	s = splbio();
578 	BUFQ_PUT(wd->sc_q, bp);
579 	wdstart(wd);
580 	splx(s);
581 	return;
582 bad:
583 	bp->b_flags |= B_ERROR;
584 done:
585 	/* Toss transfer; we're done early. */
586 	bp->b_resid = bp->b_bcount;
587 	biodone(bp);
588 }
589 
590 /*
591  * Queue a drive for I/O.
592  */
593 void
594 wdstart(void *arg)
595 {
596 	struct wd_softc *wd = arg;
597 	struct buf *bp = NULL;
598 
599 	ATADEBUG_PRINT(("wdstart %s\n", wd->sc_dev.dv_xname),
600 	    DEBUG_XFERS);
601 	while (wd->openings > 0) {
602 
603 		/* Is there a buf for us ? */
604 		if ((bp = BUFQ_GET(wd->sc_q)) == NULL)
605 			return;
606 
607 		/*
608 		 * Make the command. First lock the device
609 		 */
610 		wd->openings--;
611 
612 		wd->retries = 0;
613 		__wdstart(wd, bp);
614 	}
615 }
616 
617 static void
618 wd_split_mod15_write(struct buf *bp)
619 {
620 	struct buf *obp = bp->b_private;
621 	struct wd_softc *sc = wd_cd.cd_devs[DISKUNIT(obp->b_dev)];
622 
623 	if (__predict_false(bp->b_flags & B_ERROR) != 0) {
624 		/*
625 		 * Propagate the error.  If this was the first half of
626 		 * the original transfer, make sure to account for that
627 		 * in the residual.
628 		 */
629 		if (bp->b_data == obp->b_data)
630 			bp->b_resid += bp->b_bcount;
631 		goto done;
632 	}
633 
634 	/*
635 	 * If this was the second half of the transfer, we're all done!
636 	 */
637 	if (bp->b_data != obp->b_data)
638 		goto done;
639 
640 	/*
641 	 * Advance the pointer to the second half and issue that command
642 	 * using the same opening.
643 	 */
644 	bp->b_flags = obp->b_flags | B_CALL;
645 	bp->b_data += bp->b_bcount;
646 	bp->b_blkno += (bp->b_bcount / 512);
647 	bp->b_rawblkno += (bp->b_bcount / 512);
648 	__wdstart(sc, bp);
649 	return;
650 
651  done:
652 	obp->b_flags |= bp->b_flags & B_ERROR;
653 	obp->b_error = bp->b_error;
654 	obp->b_resid = bp->b_resid;
655 	putiobuf(bp);
656 	biodone(obp);
657 	sc->openings++;
658 	/* wddone() will call wdstart() */
659 }
660 
661 void
662 __wdstart(struct wd_softc *wd, struct buf *bp)
663 {
664 
665 	/*
666 	 * Deal with the "split mod15 write" quirk.  We just divide the
667 	 * transfer in two, doing the first half and then then second half
668 	 * with the same command opening.
669 	 *
670 	 * Note we MUST do this here, because we can't let insertion
671 	 * into the bufq cause the transfers to be re-merged.
672 	 */
673 	if (__predict_false((wd->sc_quirks & WD_QUIRK_SPLIT_MOD15_WRITE) != 0 &&
674 			    (bp->b_flags & B_READ) == 0 &&
675 			    bp->b_bcount > 512 &&
676 			    ((bp->b_bcount / 512) % 15) == 1)) {
677 		struct buf *nbp;
678 
679 		/* already at splbio */
680 		nbp = getiobuf_nowait();
681 		if (__predict_false(nbp == NULL)) {
682 			/* No memory -- fail the iop. */
683 			bp->b_error = ENOMEM;
684 			bp->b_flags |= B_ERROR;
685 			bp->b_resid = bp->b_bcount;
686 			biodone(bp);
687 			wd->openings++;
688 			return;
689 		}
690 
691 		nbp->b_error = 0;
692 		nbp->b_proc = bp->b_proc;
693 		nbp->b_vp = NULLVP;
694 		nbp->b_dev = bp->b_dev;
695 
696 		nbp->b_bcount = bp->b_bcount / 2;
697 		nbp->b_bufsize = bp->b_bcount / 2;
698 		nbp->b_data = bp->b_data;
699 
700 		nbp->b_blkno = bp->b_blkno;
701 		nbp->b_rawblkno = bp->b_rawblkno;
702 
703 		nbp->b_flags = bp->b_flags | B_CALL;
704 		nbp->b_iodone = wd_split_mod15_write;
705 
706 		/* Put ptr to orig buf in b_private and use new buf */
707 		nbp->b_private = bp;
708 
709 		BIO_COPYPRIO(nbp, bp);
710 
711 		bp = nbp;
712 	}
713 
714 	wd->sc_wdc_bio.blkno = bp->b_rawblkno;
715 	wd->sc_wdc_bio.blkdone =0;
716 	wd->sc_bp = bp;
717 	/*
718 	 * If we're retrying, retry in single-sector mode. This will give us
719 	 * the sector number of the problem, and will eventually allow the
720 	 * transfer to succeed.
721 	 */
722 	if (wd->retries >= WDIORETRIES_SINGLE)
723 		wd->sc_wdc_bio.flags = ATA_SINGLE;
724 	else
725 		wd->sc_wdc_bio.flags = 0;
726 	if (wd->sc_flags & WDF_LBA48 &&
727 	    (wd->sc_wdc_bio.blkno > LBA48_THRESHOLD ||
728 	    (wd->sc_quirks & WD_QUIRK_FORCE_LBA48) != 0))
729 		wd->sc_wdc_bio.flags |= ATA_LBA48;
730 	if (wd->sc_flags & WDF_LBA)
731 		wd->sc_wdc_bio.flags |= ATA_LBA;
732 	if (bp->b_flags & B_READ)
733 		wd->sc_wdc_bio.flags |= ATA_READ;
734 	wd->sc_wdc_bio.bcount = bp->b_bcount;
735 	wd->sc_wdc_bio.databuf = bp->b_data;
736 	/* Instrumentation. */
737 	disk_busy(&wd->sc_dk);
738 	switch (wd->atabus->ata_bio(wd->drvp, &wd->sc_wdc_bio)) {
739 	case ATACMD_TRY_AGAIN:
740 		callout_reset(&wd->sc_restart_ch, hz, wdrestart, wd);
741 		break;
742 	case ATACMD_QUEUED:
743 	case ATACMD_COMPLETE:
744 		break;
745 	default:
746 		panic("__wdstart: bad return code from ata_bio()");
747 	}
748 }
749 
750 void
751 wddone(void *v)
752 {
753 	struct wd_softc *wd = v;
754 	struct buf *bp = wd->sc_bp;
755 	const char *errmsg;
756 	int do_perror = 0;
757 	ATADEBUG_PRINT(("wddone %s\n", wd->sc_dev.dv_xname),
758 	    DEBUG_XFERS);
759 
760 	if (bp == NULL)
761 		return;
762 	bp->b_resid = wd->sc_wdc_bio.bcount;
763 	switch (wd->sc_wdc_bio.error) {
764 	case ERR_DMA:
765 		errmsg = "DMA error";
766 		goto retry;
767 	case ERR_DF:
768 		errmsg = "device fault";
769 		goto retry;
770 	case TIMEOUT:
771 		errmsg = "device timeout";
772 		goto retry;
773 	case ERR_RESET:
774 		errmsg = "channel reset";
775 		goto retry2;
776 	case ERROR:
777 		/* Don't care about media change bits */
778 		if (wd->sc_wdc_bio.r_error != 0 &&
779 		    (wd->sc_wdc_bio.r_error & ~(WDCE_MC | WDCE_MCR)) == 0)
780 			goto noerror;
781 		errmsg = "error";
782 		do_perror = 1;
783 retry:		/* Just reset and retry. Can we do more ? */
784 		(*wd->atabus->ata_reset_drive)(wd->drvp, AT_RST_NOCMD);
785 retry2:
786 		diskerr(bp, "wd", errmsg, LOG_PRINTF,
787 		    wd->sc_wdc_bio.blkdone, wd->sc_dk.dk_label);
788 		if (wd->retries < WDIORETRIES)
789 			printf(", retrying\n");
790 		if (do_perror)
791 			wdperror(wd);
792 		if (wd->retries < WDIORETRIES) {
793 			wd->retries++;
794 			callout_reset(&wd->sc_restart_ch, RECOVERYTIME,
795 			    wdrestart, wd);
796 			return;
797 		}
798 		printf("\n");
799 
800 #ifdef WD_SOFTBADSECT
801 		/*
802 		 * Not all errors indicate a failed block but those that do,
803 		 * put the block on the bad-block list for the device.  Only
804 		 * do this for reads because the drive should do it for writes,
805 		 * itself, according to Manuel.
806 		 */
807 		if ((bp->b_flags & B_READ) &&
808 		    ((wd->drvp->ata_vers >= 4 && wd->sc_wdc_bio.r_error & 64) ||
809 	     	     (wd->drvp->ata_vers < 4 && wd->sc_wdc_bio.r_error & 192))) {
810 			struct disk_badsectors *dbs;
811 
812 			dbs = malloc(sizeof *dbs, M_TEMP, M_WAITOK);
813 			dbs->dbs_min = bp->b_rawblkno;
814 			dbs->dbs_max = dbs->dbs_min + (bp->b_bcount >> DEV_BSHIFT) - 1;
815 			microtime(&dbs->dbs_failedat);
816 			SLIST_INSERT_HEAD(&wd->sc_bslist, dbs, dbs_next);
817 			wd->sc_bscount++;
818 		}
819 #endif
820 		bp->b_flags |= B_ERROR;
821 		bp->b_error = EIO;
822 		break;
823 	case NOERROR:
824 noerror:	if ((wd->sc_wdc_bio.flags & ATA_CORR) || wd->retries > 0)
825 			printf("%s: soft error (corrected)\n",
826 			    wd->sc_dev.dv_xname);
827 		break;
828 	case ERR_NODEV:
829 		bp->b_flags |= B_ERROR;
830 		bp->b_error = EIO;
831 		break;
832 	}
833 	disk_unbusy(&wd->sc_dk, (bp->b_bcount - bp->b_resid),
834 	    (bp->b_flags & B_READ));
835 #if NRND > 0
836 	rnd_add_uint32(&wd->rnd_source, bp->b_blkno);
837 #endif
838 	/* XXX Yuck, but we don't want to increment openings in this case */
839 	if (__predict_false((bp->b_flags & B_CALL) != 0 &&
840 			    bp->b_iodone == wd_split_mod15_write))
841 		biodone(bp);
842 	else {
843 		biodone(bp);
844 		wd->openings++;
845 	}
846 	wdstart(wd);
847 }
848 
849 void
850 wdrestart(void *v)
851 {
852 	struct wd_softc *wd = v;
853 	struct buf *bp = wd->sc_bp;
854 	int s;
855 	ATADEBUG_PRINT(("wdrestart %s\n", wd->sc_dev.dv_xname),
856 	    DEBUG_XFERS);
857 
858 	s = splbio();
859 	__wdstart(v, bp);
860 	splx(s);
861 }
862 
863 int
864 wdread(dev_t dev, struct uio *uio, int flags)
865 {
866 
867 	ATADEBUG_PRINT(("wdread\n"), DEBUG_XFERS);
868 	return (physio(wdstrategy, NULL, dev, B_READ, minphys, uio));
869 }
870 
871 int
872 wdwrite(dev_t dev, struct uio *uio, int flags)
873 {
874 
875 	ATADEBUG_PRINT(("wdwrite\n"), DEBUG_XFERS);
876 	return (physio(wdstrategy, NULL, dev, B_WRITE, minphys, uio));
877 }
878 
879 int
880 wdopen(dev_t dev, int flag, int fmt, struct lwp *l)
881 {
882 	struct wd_softc *wd;
883 	int part, error;
884 
885 	ATADEBUG_PRINT(("wdopen\n"), DEBUG_FUNCS);
886 	wd = device_lookup(&wd_cd, WDUNIT(dev));
887 	if (wd == NULL)
888 		return (ENXIO);
889 
890 	if (! device_is_active(&wd->sc_dev))
891 		return (ENODEV);
892 
893 	part = WDPART(dev);
894 
895 	if ((error = lockmgr(&wd->sc_dk.dk_openlock, LK_EXCLUSIVE, NULL)) != 0)
896 		return (error);
897 
898 	/*
899 	 * If there are wedges, and this is not RAW_PART, then we
900 	 * need to fail.
901 	 */
902 	if (wd->sc_dk.dk_nwedges != 0 && part != RAW_PART) {
903 		error = EBUSY;
904 		goto bad1;
905 	}
906 
907 	/*
908 	 * If this is the first open of this device, add a reference
909 	 * to the adapter.
910 	 */
911 	if (wd->sc_dk.dk_openmask == 0 &&
912 	    (error = wd->atabus->ata_addref(wd->drvp)) != 0)
913 		goto bad1;
914 
915 	if (wd->sc_dk.dk_openmask != 0) {
916 		/*
917 		 * If any partition is open, but the disk has been invalidated,
918 		 * disallow further opens.
919 		 */
920 		if ((wd->sc_flags & WDF_LOADED) == 0) {
921 			error = EIO;
922 			goto bad2;
923 		}
924 	} else {
925 		if ((wd->sc_flags & WDF_LOADED) == 0) {
926 			wd->sc_flags |= WDF_LOADED;
927 
928 			/* Load the physical device parameters. */
929 			wd_get_params(wd, AT_WAIT, &wd->sc_params);
930 
931 			/* Load the partition info if not already loaded. */
932 			wdgetdisklabel(wd);
933 		}
934 	}
935 
936 	/* Check that the partition exists. */
937 	if (part != RAW_PART &&
938 	    (part >= wd->sc_dk.dk_label->d_npartitions ||
939 	     wd->sc_dk.dk_label->d_partitions[part].p_fstype == FS_UNUSED)) {
940 		error = ENXIO;
941 		goto bad2;
942 	}
943 
944 	/* Insure only one open at a time. */
945 	switch (fmt) {
946 	case S_IFCHR:
947 		wd->sc_dk.dk_copenmask |= (1 << part);
948 		break;
949 	case S_IFBLK:
950 		wd->sc_dk.dk_bopenmask |= (1 << part);
951 		break;
952 	}
953 	wd->sc_dk.dk_openmask =
954 	    wd->sc_dk.dk_copenmask | wd->sc_dk.dk_bopenmask;
955 
956 	(void) lockmgr(&wd->sc_dk.dk_openlock, LK_RELEASE, NULL);
957 	return 0;
958 
959  bad2:
960 	if (wd->sc_dk.dk_openmask == 0)
961 		wd->atabus->ata_delref(wd->drvp);
962  bad1:
963 	(void) lockmgr(&wd->sc_dk.dk_openlock, LK_RELEASE, NULL);
964 	return error;
965 }
966 
967 int
968 wdclose(dev_t dev, int flag, int fmt, struct lwp *l)
969 {
970 	struct wd_softc *wd = device_lookup(&wd_cd, WDUNIT(dev));
971 	int part = WDPART(dev);
972 	int error;
973 
974 	ATADEBUG_PRINT(("wdclose\n"), DEBUG_FUNCS);
975 
976 	if ((error = lockmgr(&wd->sc_dk.dk_openlock, LK_EXCLUSIVE, NULL)) != 0)
977 		return error;
978 
979 	switch (fmt) {
980 	case S_IFCHR:
981 		wd->sc_dk.dk_copenmask &= ~(1 << part);
982 		break;
983 	case S_IFBLK:
984 		wd->sc_dk.dk_bopenmask &= ~(1 << part);
985 		break;
986 	}
987 	wd->sc_dk.dk_openmask =
988 	    wd->sc_dk.dk_copenmask | wd->sc_dk.dk_bopenmask;
989 
990 	if (wd->sc_dk.dk_openmask == 0) {
991 		wd_flushcache(wd, AT_WAIT);
992 
993 		if (! (wd->sc_flags & WDF_KLABEL))
994 			wd->sc_flags &= ~WDF_LOADED;
995 
996 		wd->atabus->ata_delref(wd->drvp);
997 	}
998 
999 	(void) lockmgr(&wd->sc_dk.dk_openlock, LK_RELEASE, NULL);
1000 	return 0;
1001 }
1002 
1003 void
1004 wdgetdefaultlabel(struct wd_softc *wd, struct disklabel *lp)
1005 {
1006 
1007 	ATADEBUG_PRINT(("wdgetdefaultlabel\n"), DEBUG_FUNCS);
1008 	memset(lp, 0, sizeof(struct disklabel));
1009 
1010 	lp->d_secsize = DEV_BSIZE;
1011 	lp->d_ntracks = wd->sc_params.atap_heads;
1012 	lp->d_nsectors = wd->sc_params.atap_sectors;
1013 	lp->d_ncylinders = (wd->sc_flags & WDF_LBA) ? wd->sc_capacity /
1014 		(wd->sc_params.atap_heads * wd->sc_params.atap_sectors) :
1015 		wd->sc_params.atap_cylinders;
1016 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1017 
1018 	if (strcmp(wd->sc_params.atap_model, "ST506") == 0)
1019 		lp->d_type = DTYPE_ST506;
1020 	else
1021 		lp->d_type = DTYPE_ESDI;
1022 
1023 	strncpy(lp->d_typename, wd->sc_params.atap_model, 16);
1024 	strncpy(lp->d_packname, "fictitious", 16);
1025 	if (wd->sc_capacity > UINT32_MAX)
1026 		lp->d_secperunit = UINT32_MAX;
1027 	else
1028 		lp->d_secperunit = wd->sc_capacity;
1029 	lp->d_rpm = 3600;
1030 	lp->d_interleave = 1;
1031 	lp->d_flags = 0;
1032 
1033 	lp->d_partitions[RAW_PART].p_offset = 0;
1034 	lp->d_partitions[RAW_PART].p_size =
1035 	    lp->d_secperunit * (lp->d_secsize / DEV_BSIZE);
1036 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1037 	lp->d_npartitions = RAW_PART + 1;
1038 
1039 	lp->d_magic = DISKMAGIC;
1040 	lp->d_magic2 = DISKMAGIC;
1041 	lp->d_checksum = dkcksum(lp);
1042 }
1043 
1044 /*
1045  * Fabricate a default disk label, and try to read the correct one.
1046  */
1047 void
1048 wdgetdisklabel(struct wd_softc *wd)
1049 {
1050 	struct disklabel *lp = wd->sc_dk.dk_label;
1051 	const char *errstring;
1052 	int s;
1053 
1054 	ATADEBUG_PRINT(("wdgetdisklabel\n"), DEBUG_FUNCS);
1055 
1056 	memset(wd->sc_dk.dk_cpulabel, 0, sizeof(struct cpu_disklabel));
1057 
1058 	wdgetdefaultlabel(wd, lp);
1059 
1060 	wd->sc_badsect[0] = -1;
1061 
1062 	if (wd->drvp->state > RESET) {
1063 		s = splbio();
1064 		wd->drvp->drive_flags |= DRIVE_RESET;
1065 		splx(s);
1066 	}
1067 	errstring = readdisklabel(MAKEWDDEV(0, device_unit(&wd->sc_dev),
1068 				  RAW_PART), wdstrategy, lp,
1069 				  wd->sc_dk.dk_cpulabel);
1070 	if (errstring) {
1071 		/*
1072 		 * This probably happened because the drive's default
1073 		 * geometry doesn't match the DOS geometry.  We
1074 		 * assume the DOS geometry is now in the label and try
1075 		 * again.  XXX This is a kluge.
1076 		 */
1077 		if (wd->drvp->state > RESET) {
1078 			s = splbio();
1079 			wd->drvp->drive_flags |= DRIVE_RESET;
1080 			splx(s);
1081 		}
1082 		errstring = readdisklabel(MAKEWDDEV(0, device_unit(&wd->sc_dev),
1083 		    RAW_PART), wdstrategy, lp, wd->sc_dk.dk_cpulabel);
1084 	}
1085 	if (errstring) {
1086 		printf("%s: %s\n", wd->sc_dev.dv_xname, errstring);
1087 		return;
1088 	}
1089 
1090 	if (wd->drvp->state > RESET) {
1091 		s = splbio();
1092 		wd->drvp->drive_flags |= DRIVE_RESET;
1093 		splx(s);
1094 	}
1095 #ifdef HAS_BAD144_HANDLING
1096 	if ((lp->d_flags & D_BADSECT) != 0)
1097 		bad144intern(wd);
1098 #endif
1099 }
1100 
1101 void
1102 wdperror(const struct wd_softc *wd)
1103 {
1104 	static const char *const errstr0_3[] = {"address mark not found",
1105 	    "track 0 not found", "aborted command", "media change requested",
1106 	    "id not found", "media changed", "uncorrectable data error",
1107 	    "bad block detected"};
1108 	static const char *const errstr4_5[] = {
1109 	    "obsolete (address mark not found)",
1110 	    "no media/write protected", "aborted command",
1111 	    "media change requested", "id not found", "media changed",
1112 	    "uncorrectable data error", "interface CRC error"};
1113 	const char *const *errstr;
1114 	int i;
1115 	const char *sep = "";
1116 
1117 	const char *devname = wd->sc_dev.dv_xname;
1118 	struct ata_drive_datas *drvp = wd->drvp;
1119 	int errno = wd->sc_wdc_bio.r_error;
1120 
1121 	if (drvp->ata_vers >= 4)
1122 		errstr = errstr4_5;
1123 	else
1124 		errstr = errstr0_3;
1125 
1126 	printf("%s: (", devname);
1127 
1128 	if (errno == 0)
1129 		printf("error not notified");
1130 
1131 	for (i = 0; i < 8; i++) {
1132 		if (errno & (1 << i)) {
1133 			printf("%s%s", sep, errstr[i]);
1134 			sep = ", ";
1135 		}
1136 	}
1137 	printf(")\n");
1138 }
1139 
1140 int
1141 wdioctl(dev_t dev, u_long xfer, caddr_t addr, int flag, struct lwp *l)
1142 {
1143 	struct wd_softc *wd = device_lookup(&wd_cd, WDUNIT(dev));
1144 	int error = 0, s;
1145 #ifdef __HAVE_OLD_DISKLABEL
1146 	struct disklabel *newlabel = NULL;
1147 #endif
1148 
1149 	ATADEBUG_PRINT(("wdioctl\n"), DEBUG_FUNCS);
1150 
1151 	if ((wd->sc_flags & WDF_LOADED) == 0)
1152 		return EIO;
1153 
1154 	switch (xfer) {
1155 #ifdef HAS_BAD144_HANDLING
1156 	case DIOCSBAD:
1157 		if ((flag & FWRITE) == 0)
1158 			return EBADF;
1159 		wd->sc_dk.dk_cpulabel->bad = *(struct dkbad *)addr;
1160 		wd->sc_dk.dk_label->d_flags |= D_BADSECT;
1161 		bad144intern(wd);
1162 		return 0;
1163 #endif
1164 #ifdef WD_SOFTBADSECT
1165 	case DIOCBSLIST :
1166 	{
1167 		u_int32_t count, missing, skip;
1168 		struct disk_badsecinfo dbsi;
1169 		struct disk_badsectors *dbs;
1170 		size_t available;
1171 		caddr_t laddr;
1172 
1173 		dbsi = *(struct disk_badsecinfo *)addr;
1174 		missing = wd->sc_bscount;
1175 		count = 0;
1176 		available = dbsi.dbsi_bufsize;
1177 		skip = dbsi.dbsi_skip;
1178 		laddr = dbsi.dbsi_buffer;
1179 
1180 		/*
1181 		 * We start this loop with the expectation that all of the
1182 		 * entries will be missed and decrement this counter each
1183 		 * time we either skip over one (already copied out) or
1184 		 * we actually copy it back to user space.  The structs
1185 		 * holding the bad sector information are copied directly
1186 		 * back to user space whilst the summary is returned via
1187 		 * the struct passed in via the ioctl.
1188 		 */
1189 		SLIST_FOREACH(dbs, &wd->sc_bslist, dbs_next) {
1190 			if (skip > 0) {
1191 				missing--;
1192 				skip--;
1193 				continue;
1194 			}
1195 			if (available < sizeof(*dbs))
1196 				break;
1197 			available -= sizeof(*dbs);
1198 			copyout(dbs, laddr, sizeof(*dbs));
1199 			laddr += sizeof(*dbs);
1200 			missing--;
1201 			count++;
1202 		}
1203 		dbsi.dbsi_left = missing;
1204 		dbsi.dbsi_copied = count;
1205 		*(struct disk_badsecinfo *)addr = dbsi;
1206 		return 0;
1207 	}
1208 
1209 	case DIOCBSFLUSH :
1210 		/* Clean out the bad sector list */
1211 		while (!SLIST_EMPTY(&wd->sc_bslist)) {
1212 			void *head = SLIST_FIRST(&wd->sc_bslist);
1213 			SLIST_REMOVE_HEAD(&wd->sc_bslist, dbs_next);
1214 			free(head, M_TEMP);
1215 		}
1216 		wd->sc_bscount = 0;
1217 		return 0;
1218 #endif
1219 	case DIOCGDINFO:
1220 		*(struct disklabel *)addr = *(wd->sc_dk.dk_label);
1221 		return 0;
1222 #ifdef __HAVE_OLD_DISKLABEL
1223 	case ODIOCGDINFO:
1224 		newlabel = malloc(sizeof *newlabel, M_TEMP, M_WAITOK);
1225 		if (newlabel == NULL)
1226 			return EIO;
1227 		*newlabel = *(wd->sc_dk.dk_label);
1228 		if (newlabel->d_npartitions <= OLDMAXPARTITIONS)
1229 			memcpy(addr, newlabel, sizeof (struct olddisklabel));
1230 		else
1231 			error = ENOTTY;
1232 		free(newlabel, M_TEMP);
1233 		return error;
1234 #endif
1235 
1236 	case DIOCGPART:
1237 		((struct partinfo *)addr)->disklab = wd->sc_dk.dk_label;
1238 		((struct partinfo *)addr)->part =
1239 		    &wd->sc_dk.dk_label->d_partitions[WDPART(dev)];
1240 		return 0;
1241 
1242 	case DIOCWDINFO:
1243 	case DIOCSDINFO:
1244 #ifdef __HAVE_OLD_DISKLABEL
1245 	case ODIOCWDINFO:
1246 	case ODIOCSDINFO:
1247 #endif
1248 	{
1249 		struct disklabel *lp;
1250 
1251 		if ((flag & FWRITE) == 0)
1252 			return EBADF;
1253 
1254 #ifdef __HAVE_OLD_DISKLABEL
1255 		if (xfer == ODIOCSDINFO || xfer == ODIOCWDINFO) {
1256 			newlabel = malloc(sizeof *newlabel, M_TEMP, M_WAITOK);
1257 			if (newlabel == NULL)
1258 				return EIO;
1259 			memset(newlabel, 0, sizeof newlabel);
1260 			memcpy(newlabel, addr, sizeof (struct olddisklabel));
1261 			lp = newlabel;
1262 		} else
1263 #endif
1264 		lp = (struct disklabel *)addr;
1265 
1266 		if ((error = lockmgr(&wd->sc_dk.dk_openlock, LK_EXCLUSIVE,
1267 				     NULL)) != 0)
1268 			goto bad;
1269 		wd->sc_flags |= WDF_LABELLING;
1270 
1271 		error = setdisklabel(wd->sc_dk.dk_label,
1272 		    lp, /*wd->sc_dk.dk_openmask : */0,
1273 		    wd->sc_dk.dk_cpulabel);
1274 		if (error == 0) {
1275 			if (wd->drvp->state > RESET) {
1276 				s = splbio();
1277 				wd->drvp->drive_flags |= DRIVE_RESET;
1278 				splx(s);
1279 			}
1280 			if (xfer == DIOCWDINFO
1281 #ifdef __HAVE_OLD_DISKLABEL
1282 			    || xfer == ODIOCWDINFO
1283 #endif
1284 			    )
1285 				error = writedisklabel(WDLABELDEV(dev),
1286 				    wdstrategy, wd->sc_dk.dk_label,
1287 				    wd->sc_dk.dk_cpulabel);
1288 		}
1289 
1290 		wd->sc_flags &= ~WDF_LABELLING;
1291 		(void) lockmgr(&wd->sc_dk.dk_openlock, LK_RELEASE, NULL);
1292 bad:
1293 #ifdef __HAVE_OLD_DISKLABEL
1294 		if (newlabel != NULL)
1295 			free(newlabel, M_TEMP);
1296 #endif
1297 		return error;
1298 	}
1299 
1300 	case DIOCKLABEL:
1301 		if (*(int *)addr)
1302 			wd->sc_flags |= WDF_KLABEL;
1303 		else
1304 			wd->sc_flags &= ~WDF_KLABEL;
1305 		return 0;
1306 
1307 	case DIOCWLABEL:
1308 		if ((flag & FWRITE) == 0)
1309 			return EBADF;
1310 		if (*(int *)addr)
1311 			wd->sc_flags |= WDF_WLABEL;
1312 		else
1313 			wd->sc_flags &= ~WDF_WLABEL;
1314 		return 0;
1315 
1316 	case DIOCGDEFLABEL:
1317 		wdgetdefaultlabel(wd, (struct disklabel *)addr);
1318 		return 0;
1319 #ifdef __HAVE_OLD_DISKLABEL
1320 	case ODIOCGDEFLABEL:
1321 		newlabel = malloc(sizeof *newlabel, M_TEMP, M_WAITOK);
1322 		if (newlabel == NULL)
1323 			return EIO;
1324 		wdgetdefaultlabel(wd, newlabel);
1325 		if (newlabel->d_npartitions <= OLDMAXPARTITIONS)
1326 			memcpy(addr, &newlabel, sizeof (struct olddisklabel));
1327 		else
1328 			error = ENOTTY;
1329 		free(newlabel, M_TEMP);
1330 		return error;
1331 #endif
1332 
1333 #ifdef notyet
1334 	case DIOCWFORMAT:
1335 		if ((flag & FWRITE) == 0)
1336 			return EBADF;
1337 		{
1338 		register struct format_op *fop;
1339 		struct iovec aiov;
1340 		struct uio auio;
1341 
1342 		fop = (struct format_op *)addr;
1343 		aiov.iov_base = fop->df_buf;
1344 		aiov.iov_len = fop->df_count;
1345 		auio.uio_iov = &aiov;
1346 		auio.uio_iovcnt = 1;
1347 		auio.uio_resid = fop->df_count;
1348 		auio.uio_offset =
1349 			fop->df_startblk * wd->sc_dk.dk_label->d_secsize;
1350 		auio.uio_vmspace = l->l_proc->p_vmspace;
1351 		error = physio(wdformat, NULL, dev, B_WRITE, minphys,
1352 		    &auio);
1353 		fop->df_count -= auio.uio_resid;
1354 		fop->df_reg[0] = wdc->sc_status;
1355 		fop->df_reg[1] = wdc->sc_error;
1356 		return error;
1357 		}
1358 #endif
1359 	case DIOCGCACHE:
1360 		return wd_getcache(wd, (int *)addr);
1361 
1362 	case DIOCSCACHE:
1363 		return wd_setcache(wd, *(int *)addr);
1364 
1365 	case DIOCCACHESYNC:
1366 		return wd_flushcache(wd, AT_WAIT);
1367 
1368 	case ATAIOCCOMMAND:
1369 		/*
1370 		 * Make sure this command is (relatively) safe first
1371 		 */
1372 		if ((((atareq_t *) addr)->flags & ATACMD_READ) == 0 &&
1373 		    (flag & FWRITE) == 0)
1374 			return (EBADF);
1375 		{
1376 		struct wd_ioctl *wi;
1377 		atareq_t *atareq = (atareq_t *) addr;
1378 		int error1;
1379 
1380 		wi = wi_get();
1381 		wi->wi_softc = wd;
1382 		wi->wi_atareq = *atareq;
1383 
1384 		if (atareq->datalen && atareq->flags &
1385 		    (ATACMD_READ | ATACMD_WRITE)) {
1386 			wi->wi_iov.iov_base = atareq->databuf;
1387 			wi->wi_iov.iov_len = atareq->datalen;
1388 			wi->wi_uio.uio_iov = &wi->wi_iov;
1389 			wi->wi_uio.uio_iovcnt = 1;
1390 			wi->wi_uio.uio_resid = atareq->datalen;
1391 			wi->wi_uio.uio_offset = 0;
1392 			wi->wi_uio.uio_rw =
1393 			    (atareq->flags & ATACMD_READ) ? B_READ : B_WRITE;
1394 			wi->wi_uio.uio_vmspace = l->l_proc->p_vmspace;
1395 			error1 = physio(wdioctlstrategy, &wi->wi_bp, dev,
1396 			    (atareq->flags & ATACMD_READ) ? B_READ : B_WRITE,
1397 			    minphys, &wi->wi_uio);
1398 		} else {
1399 			/* No need to call physio if we don't have any
1400 			   user data */
1401 			wi->wi_bp.b_flags = 0;
1402 			wi->wi_bp.b_data = 0;
1403 			wi->wi_bp.b_bcount = 0;
1404 			wi->wi_bp.b_dev = 0;
1405 			wi->wi_bp.b_proc = l->l_proc;
1406 			wdioctlstrategy(&wi->wi_bp);
1407 			error1 = wi->wi_bp.b_error;
1408 		}
1409 		*atareq = wi->wi_atareq;
1410 		wi_free(wi);
1411 		return(error1);
1412 		}
1413 
1414 	case DIOCAWEDGE:
1415 	    {
1416 	    	struct dkwedge_info *dkw = (void *) addr;
1417 
1418 		if ((flag & FWRITE) == 0)
1419 			return (EBADF);
1420 
1421 		/* If the ioctl happens here, the parent is us. */
1422 		strcpy(dkw->dkw_parent, wd->sc_dev.dv_xname);
1423 		return (dkwedge_add(dkw));
1424 	    }
1425 
1426 	case DIOCDWEDGE:
1427 	    {
1428 	    	struct dkwedge_info *dkw = (void *) addr;
1429 
1430 		if ((flag & FWRITE) == 0)
1431 			return (EBADF);
1432 
1433 		/* If the ioctl happens here, the parent is us. */
1434 		strcpy(dkw->dkw_parent, wd->sc_dev.dv_xname);
1435 		return (dkwedge_del(dkw));
1436 	    }
1437 
1438 	case DIOCLWEDGES:
1439 	    {
1440 	    	struct dkwedge_list *dkwl = (void *) addr;
1441 
1442 		return (dkwedge_list(&wd->sc_dk, dkwl, l));
1443 	    }
1444 
1445 	case DIOCGSTRATEGY:
1446 	    {
1447 		struct disk_strategy *dks = (void *)addr;
1448 
1449 		s = splbio();
1450 		strlcpy(dks->dks_name, bufq_getstrategyname(wd->sc_q),
1451 		    sizeof(dks->dks_name));
1452 		splx(s);
1453 		dks->dks_paramlen = 0;
1454 
1455 		return 0;
1456 	    }
1457 
1458 	case DIOCSSTRATEGY:
1459 	    {
1460 		struct disk_strategy *dks = (void *)addr;
1461 		struct bufq_state *new;
1462 		struct bufq_state *old;
1463 
1464 		if ((flag & FWRITE) == 0) {
1465 			return EBADF;
1466 		}
1467 		if (dks->dks_param != NULL) {
1468 			return EINVAL;
1469 		}
1470 		dks->dks_name[sizeof(dks->dks_name) - 1] = 0; /* ensure term */
1471 		error = bufq_alloc(&new, dks->dks_name,
1472 		    BUFQ_EXACT|BUFQ_SORT_RAWBLOCK);
1473 		if (error) {
1474 			return error;
1475 		}
1476 		s = splbio();
1477 		old = wd->sc_q;
1478 		bufq_move(new, old);
1479 		wd->sc_q = new;
1480 		splx(s);
1481 		bufq_free(old);
1482 
1483 		return 0;
1484 	    }
1485 
1486 	default:
1487 		return ENOTTY;
1488 	}
1489 
1490 #ifdef DIAGNOSTIC
1491 	panic("wdioctl: impossible");
1492 #endif
1493 }
1494 
1495 #ifdef B_FORMAT
1496 int
1497 wdformat(struct buf *bp)
1498 {
1499 
1500 	bp->b_flags |= B_FORMAT;
1501 	return wdstrategy(bp);
1502 }
1503 #endif
1504 
1505 int
1506 wdsize(dev_t dev)
1507 {
1508 	struct wd_softc *wd;
1509 	int part, omask;
1510 	int size;
1511 
1512 	ATADEBUG_PRINT(("wdsize\n"), DEBUG_FUNCS);
1513 
1514 	wd = device_lookup(&wd_cd, WDUNIT(dev));
1515 	if (wd == NULL)
1516 		return (-1);
1517 
1518 	part = WDPART(dev);
1519 	omask = wd->sc_dk.dk_openmask & (1 << part);
1520 
1521 	if (omask == 0 && wdopen(dev, 0, S_IFBLK, NULL) != 0)
1522 		return (-1);
1523 	if (wd->sc_dk.dk_label->d_partitions[part].p_fstype != FS_SWAP)
1524 		size = -1;
1525 	else
1526 		size = wd->sc_dk.dk_label->d_partitions[part].p_size *
1527 		    (wd->sc_dk.dk_label->d_secsize / DEV_BSIZE);
1528 	if (omask == 0 && wdclose(dev, 0, S_IFBLK, NULL) != 0)
1529 		return (-1);
1530 	return (size);
1531 }
1532 
1533 /* #define WD_DUMP_NOT_TRUSTED if you just want to watch */
1534 static int wddoingadump = 0;
1535 static int wddumprecalibrated = 0;
1536 
1537 /*
1538  * Dump core after a system crash.
1539  */
1540 int
1541 wddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
1542 {
1543 	struct wd_softc *wd;	/* disk unit to do the I/O */
1544 	struct disklabel *lp;   /* disk's disklabel */
1545 	int part, err;
1546 	int nblks;	/* total number of sectors left to write */
1547 
1548 	/* Check if recursive dump; if so, punt. */
1549 	if (wddoingadump)
1550 		return EFAULT;
1551 	wddoingadump = 1;
1552 
1553 	wd = device_lookup(&wd_cd, WDUNIT(dev));
1554 	if (wd == NULL)
1555 		return (ENXIO);
1556 
1557 	part = WDPART(dev);
1558 
1559 	/* Convert to disk sectors.  Request must be a multiple of size. */
1560 	lp = wd->sc_dk.dk_label;
1561 	if ((size % lp->d_secsize) != 0)
1562 		return EFAULT;
1563 	nblks = size / lp->d_secsize;
1564 	blkno = blkno / (lp->d_secsize / DEV_BSIZE);
1565 
1566 	/* Check transfer bounds against partition size. */
1567 	if ((blkno < 0) || ((blkno + nblks) > lp->d_partitions[part].p_size))
1568 		return EINVAL;
1569 
1570 	/* Offset block number to start of partition. */
1571 	blkno += lp->d_partitions[part].p_offset;
1572 
1573 	/* Recalibrate, if first dump transfer. */
1574 	if (wddumprecalibrated == 0) {
1575 		wddumprecalibrated = 1;
1576 		(*wd->atabus->ata_reset_drive)(wd->drvp,
1577 					       AT_POLL | AT_RST_EMERG);
1578 		wd->drvp->state = RESET;
1579 	}
1580 
1581 	wd->sc_bp = NULL;
1582 	wd->sc_wdc_bio.blkno = blkno;
1583 	wd->sc_wdc_bio.flags = ATA_POLL;
1584 	if (wd->sc_flags & WDF_LBA48 &&
1585 	    (blkno > LBA48_THRESHOLD ||
1586     	    (wd->sc_quirks & WD_QUIRK_FORCE_LBA48) != 0))
1587 		wd->sc_wdc_bio.flags |= ATA_LBA48;
1588 	if (wd->sc_flags & WDF_LBA)
1589 		wd->sc_wdc_bio.flags |= ATA_LBA;
1590 	wd->sc_wdc_bio.bcount = nblks * lp->d_secsize;
1591 	wd->sc_wdc_bio.databuf = va;
1592 #ifndef WD_DUMP_NOT_TRUSTED
1593 	switch (wd->atabus->ata_bio(wd->drvp, &wd->sc_wdc_bio)) {
1594 	case ATACMD_TRY_AGAIN:
1595 		panic("wddump: try again");
1596 		break;
1597 	case ATACMD_QUEUED:
1598 		panic("wddump: polled command has been queued");
1599 		break;
1600 	case ATACMD_COMPLETE:
1601 		break;
1602 	}
1603 	switch(wd->sc_wdc_bio.error) {
1604 	case TIMEOUT:
1605 		printf("wddump: device timed out");
1606 		err = EIO;
1607 		break;
1608 	case ERR_DF:
1609 		printf("wddump: drive fault");
1610 		err = EIO;
1611 		break;
1612 	case ERR_DMA:
1613 		printf("wddump: DMA error");
1614 		err = EIO;
1615 		break;
1616 	case ERROR:
1617 		printf("wddump: ");
1618 		wdperror(wd);
1619 		err = EIO;
1620 		break;
1621 	case NOERROR:
1622 		err = 0;
1623 		break;
1624 	default:
1625 		panic("wddump: unknown error type");
1626 	}
1627 	if (err != 0) {
1628 		printf("\n");
1629 		return err;
1630 	}
1631 #else	/* WD_DUMP_NOT_TRUSTED */
1632 	/* Let's just talk about this first... */
1633 	printf("wd%d: dump addr 0x%x, cylin %d, head %d, sector %d\n",
1634 	    unit, va, cylin, head, sector);
1635 	delay(500 * 1000);	/* half a second */
1636 #endif
1637 
1638 	wddoingadump = 0;
1639 	return 0;
1640 }
1641 
1642 #ifdef HAS_BAD144_HANDLING
1643 /*
1644  * Internalize the bad sector table.
1645  */
1646 void
1647 bad144intern(struct wd_softc *wd)
1648 {
1649 	struct dkbad *bt = &wd->sc_dk.dk_cpulabel->bad;
1650 	struct disklabel *lp = wd->sc_dk.dk_label;
1651 	int i = 0;
1652 
1653 	ATADEBUG_PRINT(("bad144intern\n"), DEBUG_XFERS);
1654 
1655 	for (; i < NBT_BAD; i++) {
1656 		if (bt->bt_bad[i].bt_cyl == 0xffff)
1657 			break;
1658 		wd->sc_badsect[i] =
1659 		    bt->bt_bad[i].bt_cyl * lp->d_secpercyl +
1660 		    (bt->bt_bad[i].bt_trksec >> 8) * lp->d_nsectors +
1661 		    (bt->bt_bad[i].bt_trksec & 0xff);
1662 	}
1663 	for (; i < NBT_BAD+1; i++)
1664 		wd->sc_badsect[i] = -1;
1665 }
1666 #endif
1667 
1668 int
1669 wd_get_params(struct wd_softc *wd, u_int8_t flags, struct ataparams *params)
1670 {
1671 	switch (wd->atabus->ata_get_params(wd->drvp, flags, params)) {
1672 	case CMD_AGAIN:
1673 		return 1;
1674 	case CMD_ERR:
1675 		/*
1676 		 * We `know' there's a drive here; just assume it's old.
1677 		 * This geometry is only used to read the MBR and print a
1678 		 * (false) attach message.
1679 		 */
1680 		strncpy(params->atap_model, "ST506",
1681 		    sizeof params->atap_model);
1682 		params->atap_config = ATA_CFG_FIXED;
1683 		params->atap_cylinders = 1024;
1684 		params->atap_heads = 8;
1685 		params->atap_sectors = 17;
1686 		params->atap_multi = 1;
1687 		params->atap_capabilities1 = params->atap_capabilities2 = 0;
1688 		wd->drvp->ata_vers = -1; /* Mark it as pre-ATA */
1689 		return 0;
1690 	case CMD_OK:
1691 		return 0;
1692 	default:
1693 		panic("wd_get_params: bad return code from ata_get_params");
1694 		/* NOTREACHED */
1695 	}
1696 }
1697 
1698 int
1699 wd_getcache(struct wd_softc *wd, int *bitsp)
1700 {
1701 	struct ataparams params;
1702 
1703 	if (wd_get_params(wd, AT_WAIT, &params) != 0)
1704 		return EIO;
1705 	if (params.atap_cmd_set1 == 0x0000 ||
1706 	    params.atap_cmd_set1 == 0xffff ||
1707 	    (params.atap_cmd_set1 & WDC_CMD1_CACHE) == 0) {
1708 		*bitsp = 0;
1709 		return 0;
1710 	}
1711 	*bitsp = DKCACHE_WCHANGE | DKCACHE_READ;
1712 	if (params.atap_cmd1_en & WDC_CMD1_CACHE)
1713 		*bitsp |= DKCACHE_WRITE;
1714 
1715 	return 0;
1716 }
1717 
1718 const char at_errbits[] = "\20\10ERROR\11TIMEOU\12DF";
1719 
1720 int
1721 wd_setcache(struct wd_softc *wd, int bits)
1722 {
1723 	struct ataparams params;
1724 	struct ata_command ata_c;
1725 
1726 	if (wd_get_params(wd, AT_WAIT, &params) != 0)
1727 		return EIO;
1728 
1729 	if (params.atap_cmd_set1 == 0x0000 ||
1730 	    params.atap_cmd_set1 == 0xffff ||
1731 	    (params.atap_cmd_set1 & WDC_CMD1_CACHE) == 0)
1732 		return EOPNOTSUPP;
1733 
1734 	if ((bits & DKCACHE_READ) == 0 ||
1735 	    (bits & DKCACHE_SAVE) != 0)
1736 		return EOPNOTSUPP;
1737 
1738 	memset(&ata_c, 0, sizeof(struct ata_command));
1739 	ata_c.r_command = SET_FEATURES;
1740 	ata_c.r_st_bmask = 0;
1741 	ata_c.r_st_pmask = 0;
1742 	ata_c.timeout = 30000; /* 30s timeout */
1743 	ata_c.flags = AT_WAIT;
1744 	if (bits & DKCACHE_WRITE)
1745 		ata_c.r_features = WDSF_WRITE_CACHE_EN;
1746 	else
1747 		ata_c.r_features = WDSF_WRITE_CACHE_DS;
1748 	if (wd->atabus->ata_exec_command(wd->drvp, &ata_c) != ATACMD_COMPLETE) {
1749 		printf("%s: wd_setcache command not complete\n",
1750 		    wd->sc_dev.dv_xname);
1751 		return EIO;
1752 	}
1753 	if (ata_c.flags & (AT_ERROR | AT_TIMEOU | AT_DF)) {
1754 		char sbuf[sizeof(at_errbits) + 64];
1755 		bitmask_snprintf(ata_c.flags, at_errbits, sbuf, sizeof(sbuf));
1756 		printf("%s: wd_setcache: status=%s\n", wd->sc_dev.dv_xname,
1757 		    sbuf);
1758 		return EIO;
1759 	}
1760 	return 0;
1761 }
1762 
1763 int
1764 wd_standby(struct wd_softc *wd, int flags)
1765 {
1766 	struct ata_command ata_c;
1767 
1768 	memset(&ata_c, 0, sizeof(struct ata_command));
1769 	ata_c.r_command = WDCC_STANDBY_IMMED;
1770 	ata_c.r_st_bmask = WDCS_DRDY;
1771 	ata_c.r_st_pmask = WDCS_DRDY;
1772 	ata_c.flags = flags;
1773 	ata_c.timeout = 30000; /* 30s timeout */
1774 	if (wd->atabus->ata_exec_command(wd->drvp, &ata_c) != ATACMD_COMPLETE) {
1775 		printf("%s: standby immediate command didn't complete\n",
1776 		    wd->sc_dev.dv_xname);
1777 		return EIO;
1778 	}
1779 	if (ata_c.flags & AT_ERROR) {
1780 		if (ata_c.r_error == WDCE_ABRT) /* command not supported */
1781 			return ENODEV;
1782 	}
1783 	if (ata_c.flags & (AT_ERROR | AT_TIMEOU | AT_DF)) {
1784 		char sbuf[sizeof(at_errbits) + 64];
1785 		bitmask_snprintf(ata_c.flags, at_errbits, sbuf, sizeof(sbuf));
1786 		printf("%s: wd_standby: status=%s\n", wd->sc_dev.dv_xname,
1787 		    sbuf);
1788 		return EIO;
1789 	}
1790 	return 0;
1791 }
1792 
1793 int
1794 wd_flushcache(struct wd_softc *wd, int flags)
1795 {
1796 	struct ata_command ata_c;
1797 
1798 	/*
1799 	 * WDCC_FLUSHCACHE is here since ATA-4, but some drives report
1800 	 * only ATA-2 and still support it.
1801 	 */
1802 	if (wd->drvp->ata_vers < 4 &&
1803 	    ((wd->sc_params.atap_cmd_set2 & WDC_CMD2_FC) == 0 ||
1804 	    wd->sc_params.atap_cmd_set2 == 0xffff))
1805 		return ENODEV;
1806 	memset(&ata_c, 0, sizeof(struct ata_command));
1807 	if ((wd->sc_params.atap_cmd2_en & ATA_CMD2_LBA48) != 0 &&
1808 	    (wd->sc_params.atap_cmd2_en & ATA_CMD2_FCE) != 0)
1809 		ata_c.r_command = WDCC_FLUSHCACHE_EXT;
1810 	else
1811 		ata_c.r_command = WDCC_FLUSHCACHE;
1812 	ata_c.r_st_bmask = WDCS_DRDY;
1813 	ata_c.r_st_pmask = WDCS_DRDY;
1814 	ata_c.flags = flags;
1815 	ata_c.timeout = 30000; /* 30s timeout */
1816 	if (wd->atabus->ata_exec_command(wd->drvp, &ata_c) != ATACMD_COMPLETE) {
1817 		printf("%s: flush cache command didn't complete\n",
1818 		    wd->sc_dev.dv_xname);
1819 		return EIO;
1820 	}
1821 	if (ata_c.flags & AT_ERROR) {
1822 		if (ata_c.r_error == WDCE_ABRT) /* command not supported */
1823 			return ENODEV;
1824 	}
1825 	if (ata_c.flags & (AT_ERROR | AT_TIMEOU | AT_DF)) {
1826 		char sbuf[sizeof(at_errbits) + 64];
1827 		bitmask_snprintf(ata_c.flags, at_errbits, sbuf, sizeof(sbuf));
1828 		printf("%s: wd_flushcache: status=%s\n", wd->sc_dev.dv_xname,
1829 		    sbuf);
1830 		return EIO;
1831 	}
1832 	return 0;
1833 }
1834 
1835 void
1836 wd_shutdown(void *arg)
1837 {
1838 	struct wd_softc *wd = arg;
1839 	wd_flushcache(wd, AT_POLL);
1840 }
1841 
1842 /*
1843  * Allocate space for a ioctl queue structure.  Mostly taken from
1844  * scsipi_ioctl.c
1845  */
1846 struct wd_ioctl *
1847 wi_get(void)
1848 {
1849 	struct wd_ioctl *wi;
1850 	int s;
1851 
1852 	wi = malloc(sizeof(struct wd_ioctl), M_TEMP, M_WAITOK|M_ZERO);
1853 	simple_lock_init(&wi->wi_bp.b_interlock);
1854 	s = splbio();
1855 	LIST_INSERT_HEAD(&wi_head, wi, wi_list);
1856 	splx(s);
1857 	return (wi);
1858 }
1859 
1860 /*
1861  * Free an ioctl structure and remove it from our list
1862  */
1863 
1864 void
1865 wi_free(struct wd_ioctl *wi)
1866 {
1867 	int s;
1868 
1869 	s = splbio();
1870 	LIST_REMOVE(wi, wi_list);
1871 	splx(s);
1872 	free(wi, M_TEMP);
1873 }
1874 
1875 /*
1876  * Find a wd_ioctl structure based on the struct buf.
1877  */
1878 
1879 struct wd_ioctl *
1880 wi_find(struct buf *bp)
1881 {
1882 	struct wd_ioctl *wi;
1883 	int s;
1884 
1885 	s = splbio();
1886 	for (wi = wi_head.lh_first; wi != 0; wi = wi->wi_list.le_next)
1887 		if (bp == &wi->wi_bp)
1888 			break;
1889 	splx(s);
1890 	return (wi);
1891 }
1892 
1893 /*
1894  * Ioctl pseudo strategy routine
1895  *
1896  * This is mostly stolen from scsipi_ioctl.c:scsistrategy().  What
1897  * happens here is:
1898  *
1899  * - wdioctl() queues a wd_ioctl structure.
1900  *
1901  * - wdioctl() calls physio/wdioctlstrategy based on whether or not
1902  *   user space I/O is required.  If physio() is called, physio() eventually
1903  *   calls wdioctlstrategy().
1904  *
1905  * - In either case, wdioctlstrategy() calls wd->atabus->ata_exec_command()
1906  *   to perform the actual command
1907  *
1908  * The reason for the use of the pseudo strategy routine is because
1909  * when doing I/O to/from user space, physio _really_ wants to be in
1910  * the loop.  We could put the entire buffer into the ioctl request
1911  * structure, but that won't scale if we want to do things like download
1912  * microcode.
1913  */
1914 
1915 void
1916 wdioctlstrategy(struct buf *bp)
1917 {
1918 	struct wd_ioctl *wi;
1919 	struct ata_command ata_c;
1920 	int error = 0;
1921 
1922 	wi = wi_find(bp);
1923 	if (wi == NULL) {
1924 		printf("wdioctlstrategy: "
1925 		    "No matching ioctl request found in queue\n");
1926 		error = EINVAL;
1927 		goto bad;
1928 	}
1929 
1930 	memset(&ata_c, 0, sizeof(ata_c));
1931 
1932 	/*
1933 	 * Abort if physio broke up the transfer
1934 	 */
1935 
1936 	if (bp->b_bcount != wi->wi_atareq.datalen) {
1937 		printf("physio split wd ioctl request... cannot proceed\n");
1938 		error = EIO;
1939 		goto bad;
1940 	}
1941 
1942 	/*
1943 	 * Abort if we didn't get a buffer size that was a multiple of
1944 	 * our sector size (or was larger than NBBY)
1945 	 */
1946 
1947 	if ((bp->b_bcount % wi->wi_softc->sc_dk.dk_label->d_secsize) != 0 ||
1948 	    (bp->b_bcount / wi->wi_softc->sc_dk.dk_label->d_secsize) >=
1949 	     (1 << NBBY)) {
1950 		error = EINVAL;
1951 		goto bad;
1952 	}
1953 
1954 	/*
1955 	 * Make sure a timeout was supplied in the ioctl request
1956 	 */
1957 
1958 	if (wi->wi_atareq.timeout == 0) {
1959 		error = EINVAL;
1960 		goto bad;
1961 	}
1962 
1963 	if (wi->wi_atareq.flags & ATACMD_READ)
1964 		ata_c.flags |= AT_READ;
1965 	else if (wi->wi_atareq.flags & ATACMD_WRITE)
1966 		ata_c.flags |= AT_WRITE;
1967 
1968 	if (wi->wi_atareq.flags & ATACMD_READREG)
1969 		ata_c.flags |= AT_READREG;
1970 
1971 	ata_c.flags |= AT_WAIT;
1972 
1973 	ata_c.timeout = wi->wi_atareq.timeout;
1974 	ata_c.r_command = wi->wi_atareq.command;
1975 	ata_c.r_head = wi->wi_atareq.head & 0x0f;
1976 	ata_c.r_cyl = wi->wi_atareq.cylinder;
1977 	ata_c.r_sector = wi->wi_atareq.sec_num;
1978 	ata_c.r_count = wi->wi_atareq.sec_count;
1979 	ata_c.r_features = wi->wi_atareq.features;
1980 	ata_c.r_st_bmask = WDCS_DRDY;
1981 	ata_c.r_st_pmask = WDCS_DRDY;
1982 	ata_c.data = wi->wi_bp.b_data;
1983 	ata_c.bcount = wi->wi_bp.b_bcount;
1984 
1985 	if (wi->wi_softc->atabus->ata_exec_command(wi->wi_softc->drvp, &ata_c)
1986 	    != ATACMD_COMPLETE) {
1987 		wi->wi_atareq.retsts = ATACMD_ERROR;
1988 		goto bad;
1989 	}
1990 
1991 	if (ata_c.flags & (AT_ERROR | AT_TIMEOU | AT_DF)) {
1992 		if (ata_c.flags & AT_ERROR) {
1993 			wi->wi_atareq.retsts = ATACMD_ERROR;
1994 			wi->wi_atareq.error = ata_c.r_error;
1995 		} else if (ata_c.flags & AT_DF)
1996 			wi->wi_atareq.retsts = ATACMD_DF;
1997 		else
1998 			wi->wi_atareq.retsts = ATACMD_TIMEOUT;
1999 	} else {
2000 		wi->wi_atareq.retsts = ATACMD_OK;
2001 		if (wi->wi_atareq.flags & ATACMD_READREG) {
2002 			wi->wi_atareq.head = ata_c.r_head ;
2003 			wi->wi_atareq.cylinder = ata_c.r_cyl;
2004 			wi->wi_atareq.sec_num = ata_c.r_sector;
2005 			wi->wi_atareq.sec_count = ata_c.r_count;
2006 			wi->wi_atareq.features = ata_c.r_features;
2007 			wi->wi_atareq.error = ata_c.r_error;
2008 		}
2009 	}
2010 
2011 	bp->b_error = 0;
2012 	biodone(bp);
2013 	return;
2014 bad:
2015 	bp->b_flags |= B_ERROR;
2016 	bp->b_error = error;
2017 	biodone(bp);
2018 }
2019