xref: /netbsd-src/sys/dev/ata/wd.c (revision bf3cd1d6de8a2e9e36970f3fb7fddc0b60797714)
1 /*	$NetBSD: wd.c,v 1.470 2024/09/22 17:31:43 uwe Exp $ */
2 
3 /*
4  * Copyright (c) 1998, 2001 Manuel Bouyer.  All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *	notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *	notice, this list of conditions and the following disclaimer in the
13  *	documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /*-
28  * Copyright (c) 1998, 2003, 2004 The NetBSD Foundation, Inc.
29  * All rights reserved.
30  *
31  * This code is derived from software contributed to The NetBSD Foundation
32  * by Charles M. Hannum and by Onno van der Linden.
33  *
34  * Redistribution and use in source and binary forms, with or without
35  * modification, are permitted provided that the following conditions
36  * are met:
37  * 1. Redistributions of source code must retain the above copyright
38  *    notice, this list of conditions and the following disclaimer.
39  * 2. Redistributions in binary form must reproduce the above copyright
40  *    notice, this list of conditions and the following disclaimer in the
41  *    documentation and/or other materials provided with the distribution.
42  *
43  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
44  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
45  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
46  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
47  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
48  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
49  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
50  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
51  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
52  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
53  * POSSIBILITY OF SUCH DAMAGE.
54  */
55 
56 #include <sys/cdefs.h>
57 __KERNEL_RCSID(0, "$NetBSD: wd.c,v 1.470 2024/09/22 17:31:43 uwe Exp $");
58 
59 #include "opt_ata.h"
60 #include "opt_wd.h"
61 
62 #include <sys/param.h>
63 #include <sys/systm.h>
64 #include <sys/kernel.h>
65 #include <sys/conf.h>
66 #include <sys/file.h>
67 #include <sys/stat.h>
68 #include <sys/ioctl.h>
69 #include <sys/buf.h>
70 #include <sys/bufq.h>
71 #include <sys/uio.h>
72 #include <sys/device.h>
73 #include <sys/disklabel.h>
74 #include <sys/disk.h>
75 #include <sys/syslog.h>
76 #include <sys/proc.h>
77 #include <sys/reboot.h>
78 #include <sys/vnode.h>
79 #include <sys/rndsource.h>
80 
81 #include <sys/intr.h>
82 #include <sys/bus.h>
83 
84 #include <dev/ata/atareg.h>
85 #include <dev/ata/atavar.h>
86 #include <dev/ata/wdvar.h>
87 #include <dev/ic/wdcreg.h>
88 #include <sys/ataio.h>
89 #include "locators.h"
90 
91 #include <prop/proplib.h>
92 
93 #define	WDIORETRIES_SINGLE 4	/* number of retries for single-sector */
94 #define	WDIORETRIES	5	/* number of retries before giving up */
95 #define	RECOVERYTIME hz/2	/* time to wait before retrying a cmd */
96 
97 #define	WDUNIT(dev)		DISKUNIT(dev)
98 #define	WDPART(dev)		DISKPART(dev)
99 #define	WDMINOR(unit, part)	DISKMINOR(unit, part)
100 #define	MAKEWDDEV(maj, unit, part)	MAKEDISKDEV(maj, unit, part)
101 
102 #define	WDLABELDEV(dev)	(MAKEWDDEV(major(dev), WDUNIT(dev), RAW_PART))
103 
104 #define DEBUG_FUNCS  0x08
105 #define DEBUG_PROBE  0x10
106 #define DEBUG_DETACH 0x20
107 #define DEBUG_XFERS  0x40
108 #ifdef ATADEBUG
109 #ifndef ATADEBUG_WD_MASK
110 #define ATADEBUG_WD_MASK 0x0
111 #endif
112 int wdcdebug_wd_mask = ATADEBUG_WD_MASK;
113 #define ATADEBUG_PRINT(args, level) \
114 	if (wdcdebug_wd_mask & (level)) \
115 		printf args
116 #else
117 #define ATADEBUG_PRINT(args, level)
118 #endif
119 
120 static int	wdprobe(device_t, cfdata_t, void *);
121 static void	wdattach(device_t, device_t, void *);
122 static int	wddetach(device_t, int);
123 static void	wdperror(const struct wd_softc *, struct ata_xfer *);
124 
125 static void	wdminphys(struct buf *);
126 
127 static int	wd_firstopen(device_t, dev_t, int, int);
128 static int	wd_lastclose(device_t);
129 static bool	wd_suspend(device_t, const pmf_qual_t *);
130 static int	wd_standby(struct wd_softc *, int);
131 
132 CFATTACH_DECL3_NEW(wd, sizeof(struct wd_softc),
133     wdprobe, wdattach, wddetach, NULL, NULL, NULL, DVF_DETACH_SHUTDOWN);
134 
135 extern struct cfdriver wd_cd;
136 
137 static dev_type_open(wdopen);
138 static dev_type_close(wdclose);
139 static dev_type_read(wdread);
140 static dev_type_write(wdwrite);
141 static dev_type_ioctl(wdioctl);
142 static dev_type_strategy(wdstrategy);
143 static dev_type_dump(wddump);
144 static dev_type_size(wdsize);
145 static dev_type_discard(wddiscard);
146 
147 const struct bdevsw wd_bdevsw = {
148 	.d_open = wdopen,
149 	.d_close = wdclose,
150 	.d_strategy = wdstrategy,
151 	.d_ioctl = wdioctl,
152 	.d_dump = wddump,
153 	.d_psize = wdsize,
154 	.d_discard = wddiscard,
155 	.d_cfdriver = &wd_cd,
156 	.d_devtounit = disklabel_dev_unit,
157 	.d_flag = D_DISK
158 };
159 
160 const struct cdevsw wd_cdevsw = {
161 	.d_open = wdopen,
162 	.d_close = wdclose,
163 	.d_read = wdread,
164 	.d_write = wdwrite,
165 	.d_ioctl = wdioctl,
166 	.d_stop = nostop,
167 	.d_tty = notty,
168 	.d_poll = nopoll,
169 	.d_mmap = nommap,
170 	.d_kqfilter = nokqfilter,
171 	.d_discard = wddiscard,
172 	.d_cfdriver = &wd_cd,
173 	.d_devtounit = disklabel_dev_unit,
174 	.d_flag = D_DISK
175 };
176 
177 /* #define WD_DUMP_NOT_TRUSTED if you just want to watch */
178 static int wddoingadump = 0;
179 static int wddumprecalibrated = 0;
180 
181 /*
182  * Glue necessary to hook WDCIOCCOMMAND into physio
183  */
184 
185 struct wd_ioctl {
186 	LIST_ENTRY(wd_ioctl) wi_list;
187 	struct buf wi_bp;
188 	struct uio wi_uio;
189 	struct iovec wi_iov;
190 	atareq_t wi_atareq;
191 	struct wd_softc *wi_softc;
192 };
193 
194 static struct	wd_ioctl *wi_find(struct buf *);
195 static void	wi_free(struct wd_ioctl *);
196 static struct	wd_ioctl *wi_get(struct wd_softc *);
197 static void	wdioctlstrategy(struct buf *);
198 
199 static void	wdrestart(void *);
200 static void	wdstart1(struct wd_softc *, struct buf *, struct ata_xfer *);
201 static int	wd_diskstart(device_t, struct buf *);
202 static int	wd_dumpblocks(device_t, void *, daddr_t, int);
203 static void	wd_iosize(device_t, int *);
204 static int	wd_discard(device_t, off_t, off_t);
205 static void	wdbioretry(void *);
206 static void	wdbiorequeue(void *);
207 static void	wddone(device_t, struct ata_xfer *);
208 static int	wd_get_params(struct wd_softc *, struct ataparams *);
209 static void	wd_set_geometry(struct wd_softc *);
210 static int	wd_flushcache(struct wd_softc *, int);
211 static int	wd_trim(struct wd_softc *, daddr_t, long);
212 static bool	wd_shutdown(device_t, int);
213 
214 static int wd_getcache(struct wd_softc *, int *);
215 static int wd_setcache(struct wd_softc *, int);
216 
217 static void wd_sysctl_attach(struct wd_softc *);
218 static void wd_sysctl_detach(struct wd_softc *);
219 
220 static const struct dkdriver wddkdriver = {
221 	.d_open = wdopen,
222 	.d_close = wdclose,
223 	.d_strategy = wdstrategy,
224 	.d_minphys = wdminphys,
225 	.d_diskstart = wd_diskstart,
226 	.d_dumpblocks = wd_dumpblocks,
227 	.d_iosize = wd_iosize,
228 	.d_firstopen = wd_firstopen,
229 	.d_lastclose = wd_lastclose,
230 	.d_discard = wd_discard
231 };
232 
233 #ifdef HAS_BAD144_HANDLING
234 static void bad144intern(struct wd_softc *);
235 #endif
236 
237 #define	WD_QUIRK_SPLIT_MOD15_WRITE	0x0001	/* must split certain writes */
238 
239 #define	WD_QUIRK_FMT "\20\1SPLIT_MOD15_WRITE"
240 
241 /*
242  * Quirk table for IDE drives.  Put more-specific matches first, since
243  * a simple globing routine is used for matching.
244  */
245 static const struct wd_quirk {
246 	const char *wdq_match;		/* inquiry pattern to match */
247 	int wdq_quirks;			/* drive quirks */
248 } wd_quirk_table[] = {
249 	/*
250 	 * Some Seagate S-ATA drives have a PHY which can get confused
251 	 * with the way data is packetized by some S-ATA controllers.
252 	 *
253 	 * The work-around is to split in two any write transfer whose
254 	 * sector count % 15 == 1 (assuming 512 byte sectors).
255 	 *
256 	 * XXX This is an incomplete list.  There are at least a couple
257 	 * XXX more model numbers.  If you have trouble with such transfers
258 	 * XXX (8K is the most common) on Seagate S-ATA drives, please
259 	 * XXX notify thorpej@NetBSD.org.
260 	 *
261 	 * The ST360015AS has not yet been confirmed to have this
262 	 * issue, however, it is the only other drive in the
263 	 * Seagate Barracuda Serial ATA V family.
264 	 *
265 	 */
266 	{ "ST3120023AS", WD_QUIRK_SPLIT_MOD15_WRITE },
267 	{ "ST380023AS", WD_QUIRK_SPLIT_MOD15_WRITE },
268 	{ "ST360015AS", WD_QUIRK_SPLIT_MOD15_WRITE },
269 	{ NULL,
270 	  0 }
271 };
272 
273 static const struct wd_quirk *
274 wd_lookup_quirks(const char *name)
275 {
276 	const struct wd_quirk *wdq;
277 	const char *estr;
278 
279 	for (wdq = wd_quirk_table; wdq->wdq_match != NULL; wdq++) {
280 		/*
281 		 * We only want exact matches (which include matches
282 		 * against globbing characters).
283 		 */
284 		if (pmatch(name, wdq->wdq_match, &estr) == 2)
285 			return (wdq);
286 	}
287 	return (NULL);
288 }
289 
290 static int
291 wdprobe(device_t parent, cfdata_t match, void *aux)
292 {
293 	struct ata_device *adev = aux;
294 
295 	if (adev == NULL)
296 		return 0;
297 	if (adev->adev_bustype->bustype_type != SCSIPI_BUSTYPE_ATA)
298 		return 0;
299 
300 	if (match->cf_loc[ATA_HLCF_DRIVE] != ATA_HLCF_DRIVE_DEFAULT &&
301 	    match->cf_loc[ATA_HLCF_DRIVE] != adev->adev_drv_data->drive)
302 		return 0;
303 	return 1;
304 }
305 
306 static void
307 wdattach(device_t parent, device_t self, void *aux)
308 {
309 	struct wd_softc *wd = device_private(self);
310 	struct dk_softc *dksc = &wd->sc_dksc;
311 	struct ata_device *adev= aux;
312 	int i, blank;
313 	char tbuf[41],pbuf[9], c, *p, *q;
314 	const struct wd_quirk *wdq;
315 	int dtype = DKTYPE_UNKNOWN;
316 
317 	dksc->sc_dev = self;
318 
319 	ATADEBUG_PRINT(("wdattach\n"), DEBUG_FUNCS | DEBUG_PROBE);
320 	mutex_init(&wd->sc_lock, MUTEX_DEFAULT, IPL_BIO);
321 #ifdef WD_SOFTBADSECT
322 	SLIST_INIT(&wd->sc_bslist);
323 	cv_init(&wd->sc_bslist_cv, "wdbadsect");
324 #endif
325 	wd->atabus = adev->adev_bustype;
326 	wd->inflight = 0;
327 	wd->drvp = adev->adev_drv_data;
328 
329 	wd->drvp->drv_openings = 1;
330 	wd->drvp->drv_done = wddone;
331 	wd->drvp->drv_softc = dksc->sc_dev; /* done in atabusconfig_thread()
332 					     but too late */
333 
334 	SLIST_INIT(&wd->sc_retry_list);
335 	SLIST_INIT(&wd->sc_requeue_list);
336 	callout_init(&wd->sc_retry_callout, 0);		/* XXX MPSAFE */
337 	callout_init(&wd->sc_requeue_callout, 0);	/* XXX MPSAFE */
338 	callout_init(&wd->sc_restart_diskqueue, 0);	/* XXX MPSAFE */
339 
340 	aprint_naive("\n");
341 	aprint_normal("\n");
342 
343 	/* read our drive info */
344 	if (wd_get_params(wd, &wd->sc_params) != 0) {
345 		aprint_error_dev(self, "IDENTIFY failed\n");
346 		goto out;
347 	}
348 
349 	for (blank = 0, p = wd->sc_params.atap_model, q = tbuf, i = 0;
350 	    i < sizeof(wd->sc_params.atap_model); i++) {
351 		c = *p++;
352 		if (c == '\0')
353 			break;
354 		if (c != ' ') {
355 			if (blank) {
356 				*q++ = ' ';
357 				blank = 0;
358 			}
359 			*q++ = c;
360 		} else
361 			blank = 1;
362 	}
363 	*q++ = '\0';
364 
365 	wd->sc_typename = kmem_asprintf("%s", tbuf);
366 	aprint_normal_dev(self, "<%s>\n", wd->sc_typename);
367 
368 	wdq = wd_lookup_quirks(tbuf);
369 	if (wdq != NULL)
370 		wd->sc_quirks = wdq->wdq_quirks;
371 
372 	if (wd->sc_quirks != 0) {
373 		char sbuf[sizeof(WD_QUIRK_FMT) + 64];
374 		snprintb(sbuf, sizeof(sbuf), WD_QUIRK_FMT, wd->sc_quirks);
375 		aprint_normal_dev(self, "quirks %s\n", sbuf);
376 
377 		if (wd->sc_quirks & WD_QUIRK_SPLIT_MOD15_WRITE) {
378 			aprint_error_dev(self, "drive corrupts write transfers with certain controllers, consider replacing\n");
379 		}
380 	}
381 
382 	if ((wd->sc_params.atap_multi & 0xff) > 1) {
383 		wd->drvp->multi = wd->sc_params.atap_multi & 0xff;
384 	} else {
385 		wd->drvp->multi = 1;
386 	}
387 
388 	aprint_verbose_dev(self, "drive supports %d-sector PIO transfers,",
389 	    wd->drvp->multi);
390 
391 	/* 48-bit LBA addressing */
392 	if ((wd->sc_params.atap_cmd2_en & ATA_CMD2_LBA48) != 0)
393 		wd->sc_flags |= WDF_LBA48;
394 
395 	/* Prior to ATA-4, LBA was optional. */
396 	if ((wd->sc_params.atap_capabilities1 & WDC_CAP_LBA) != 0)
397 		wd->sc_flags |= WDF_LBA;
398 #if 0
399 	/* ATA-4 requires LBA. */
400 	if (wd->sc_params.atap_ataversion != 0xffff &&
401 	    wd->sc_params.atap_ataversion >= WDC_VER_ATA4)
402 		wd->sc_flags |= WDF_LBA;
403 #endif
404 
405 	if ((wd->sc_flags & WDF_LBA48) != 0) {
406 		aprint_verbose(" LBA48 addressing\n");
407 		wd->sc_capacity =
408 		    ((uint64_t) wd->sc_params.atap_max_lba[3] << 48) |
409 		    ((uint64_t) wd->sc_params.atap_max_lba[2] << 32) |
410 		    ((uint64_t) wd->sc_params.atap_max_lba[1] << 16) |
411 		    ((uint64_t) wd->sc_params.atap_max_lba[0] <<  0);
412 		wd->sc_capacity28 =
413 		    (wd->sc_params.atap_capacity[1] << 16) |
414 		    wd->sc_params.atap_capacity[0];
415 		/*
416 		 * Force LBA48 addressing for invalid numbers.
417 		 */
418 		if (wd->sc_capacity28 > 0xfffffff)
419 			wd->sc_capacity28 = 0xfffffff;
420 	} else if ((wd->sc_flags & WDF_LBA) != 0) {
421 		aprint_verbose(" LBA addressing\n");
422 		wd->sc_capacity28 =
423 		    (wd->sc_params.atap_capacity[1] << 16) |
424 		    wd->sc_params.atap_capacity[0];
425 		/*
426 		 * Limit capacity to LBA28 numbers to avoid overflow.
427 		 */
428 		if (wd->sc_capacity28 > 0xfffffff)
429 			wd->sc_capacity28 = 0xfffffff;
430 		wd->sc_capacity = wd->sc_capacity28;
431 	} else {
432 		aprint_verbose(" chs addressing\n");
433 		wd->sc_capacity =
434 		    wd->sc_params.atap_cylinders *
435 		    wd->sc_params.atap_heads *
436 		    wd->sc_params.atap_sectors;
437 		/*
438 		 * LBA28 size is ignored for CHS addressing. Use a reasonable
439 		 * value for debugging. The CHS values may be artificial and
440 		 * are mostly ignored.
441 		 */
442 		if (wd->sc_capacity < 0xfffffff)
443 			wd->sc_capacity28 = wd->sc_capacity;
444 		else
445 			wd->sc_capacity28 = 0xfffffff;
446 	}
447 	if ((wd->sc_params.atap_secsz & ATA_SECSZ_VALID_MASK) == ATA_SECSZ_VALID
448 	    && ((wd->sc_params.atap_secsz & ATA_SECSZ_LLS) != 0)) {
449 		wd->sc_blksize = 2ULL *
450 		    ((uint32_t)((wd->sc_params.atap_lls_secsz[1] << 16) |
451 		    wd->sc_params.atap_lls_secsz[0]));
452 	} else {
453 		wd->sc_blksize = 512;
454 	}
455 	wd->sc_sectoralign.dsa_firstaligned = 0;
456 	wd->sc_sectoralign.dsa_alignment = 1;
457 	if ((wd->sc_params.atap_secsz & ATA_SECSZ_VALID_MASK) == ATA_SECSZ_VALID
458 	    && ((wd->sc_params.atap_secsz & ATA_SECSZ_LPS) != 0)) {
459 		wd->sc_sectoralign.dsa_alignment = 1 <<
460 		    (wd->sc_params.atap_secsz & ATA_SECSZ_LPS_SZMSK);
461 		if ((wd->sc_params.atap_logical_align & ATA_LA_VALID_MASK) ==
462 		    ATA_LA_VALID) {
463 			wd->sc_sectoralign.dsa_firstaligned =
464 			    (wd->sc_sectoralign.dsa_alignment -
465 				(wd->sc_params.atap_logical_align &
466 				    ATA_LA_MASK));
467 		}
468 	}
469 	wd->sc_capacity512 = (wd->sc_capacity * wd->sc_blksize) / DEV_BSIZE;
470 	format_bytes(pbuf, sizeof(pbuf), wd->sc_capacity * wd->sc_blksize);
471 	aprint_normal_dev(self, "%s, %d cyl, %d head, %d sec, "
472 	    "%d bytes/sect x %llu sectors",
473 	    pbuf,
474 	    (wd->sc_flags & WDF_LBA) ? (int)(wd->sc_capacity /
475 		(wd->sc_params.atap_heads * wd->sc_params.atap_sectors)) :
476 		wd->sc_params.atap_cylinders,
477 	    wd->sc_params.atap_heads, wd->sc_params.atap_sectors,
478 	    wd->sc_blksize, (unsigned long long)wd->sc_capacity);
479 	if (wd->sc_sectoralign.dsa_alignment != 1) {
480 		aprint_normal(" (%d bytes/physsect",
481 		    wd->sc_sectoralign.dsa_alignment * wd->sc_blksize);
482 		if (wd->sc_sectoralign.dsa_firstaligned != 0) {
483 			aprint_normal("; first aligned sector: %jd",
484 			    (intmax_t)wd->sc_sectoralign.dsa_firstaligned);
485 		}
486 		aprint_normal(")");
487 	}
488 	aprint_normal("\n");
489 
490 	ATADEBUG_PRINT(("%s: atap_dmatiming_mimi=%d, atap_dmatiming_recom=%d\n",
491 	    device_xname(self), wd->sc_params.atap_dmatiming_mimi,
492 	    wd->sc_params.atap_dmatiming_recom), DEBUG_PROBE);
493 
494 	if (wd->sc_blksize <= 0 || !powerof2(wd->sc_blksize) ||
495 	    wd->sc_blksize < DEV_BSIZE || wd->sc_blksize > MAXPHYS) {
496 		aprint_normal_dev(self, "WARNING: block size %u "
497 		    "might not actually work\n", wd->sc_blksize);
498 	}
499 
500 	if (strcmp(wd->sc_params.atap_model, "ST506") == 0)
501 		dtype = DKTYPE_ST506;
502 	else
503 		dtype = DKTYPE_ESDI;
504 
505 out:
506 	/*
507 	 * Initialize and attach the disk structure.
508 	 */
509 	dk_init(dksc, self, dtype);
510 	disk_init(&dksc->sc_dkdev, dksc->sc_xname, &wddkdriver);
511 
512 	/* Attach dk and disk subsystems */
513 	dk_attach(dksc);
514 	disk_attach(&dksc->sc_dkdev);
515 	wd_set_geometry(wd);
516 
517 	bufq_alloc(&dksc->sc_bufq, BUFQ_DISK_DEFAULT_STRAT, BUFQ_SORT_RAWBLOCK);
518 
519 	/* reference to label structure, used by ata code */
520 	wd->drvp->lp = dksc->sc_dkdev.dk_label;
521 
522 	/* Discover wedges on this disk. */
523 	dkwedge_discover(&dksc->sc_dkdev);
524 
525 	if (!pmf_device_register1(self, wd_suspend, NULL, wd_shutdown))
526 		aprint_error_dev(self, "couldn't establish power handler\n");
527 
528 	wd_sysctl_attach(wd);
529 }
530 
531 static bool
532 wd_suspend(device_t dv, const pmf_qual_t *qual)
533 {
534 	struct wd_softc *sc = device_private(dv);
535 
536 	/* the adapter needs to be enabled */
537 	if (sc->atabus->ata_addref(sc->drvp))
538 		return true; /* no need to complain */
539 
540 	wd_flushcache(sc, AT_WAIT);
541 	wd_standby(sc, AT_WAIT);
542 
543 	sc->atabus->ata_delref(sc->drvp);
544 	return true;
545 }
546 
547 static int
548 wddetach(device_t self, int flags)
549 {
550 	struct wd_softc *wd = device_private(self);
551 	struct dk_softc *dksc = &wd->sc_dksc;
552 	int bmaj, cmaj, i, mn, rc;
553 
554 	if ((rc = disk_begindetach(&dksc->sc_dkdev, wd_lastclose, self, flags)) != 0)
555 		return rc;
556 
557 	/* locate the major number */
558 	bmaj = bdevsw_lookup_major(&wd_bdevsw);
559 	cmaj = cdevsw_lookup_major(&wd_cdevsw);
560 
561 	/* Nuke the vnodes for any open instances. */
562 	for (i = 0; i < MAXPARTITIONS; i++) {
563 		mn = WDMINOR(device_unit(self), i);
564 		vdevgone(bmaj, mn, mn, VBLK);
565 		vdevgone(cmaj, mn, mn, VCHR);
566 	}
567 
568 	dk_drain(dksc);
569 
570 	/* Kill off any pending commands. */
571 	mutex_enter(&wd->sc_lock);
572 	wd->atabus->ata_killpending(wd->drvp);
573 
574 	callout_halt(&wd->sc_retry_callout, &wd->sc_lock);
575 	callout_destroy(&wd->sc_retry_callout);
576 	callout_halt(&wd->sc_requeue_callout, &wd->sc_lock);
577 	callout_destroy(&wd->sc_requeue_callout);
578 	callout_halt(&wd->sc_restart_diskqueue, &wd->sc_lock);
579 	callout_destroy(&wd->sc_restart_diskqueue);
580 
581 	mutex_exit(&wd->sc_lock);
582 
583 	bufq_free(dksc->sc_bufq);
584 
585 	/* Delete all of our wedges. */
586 	dkwedge_delall(&dksc->sc_dkdev);
587 
588 	if (flags & DETACH_POWEROFF)
589 		wd_standby(wd, AT_POLL);
590 
591 	/* Detach from the disk list. */
592 	disk_detach(&dksc->sc_dkdev);
593 	disk_destroy(&dksc->sc_dkdev);
594 
595 	dk_detach(dksc);
596 
597 #ifdef WD_SOFTBADSECT
598 	/* Clean out the bad sector list */
599 	while (!SLIST_EMPTY(&wd->sc_bslist)) {
600 		struct disk_badsectors *dbs = SLIST_FIRST(&wd->sc_bslist);
601 		SLIST_REMOVE_HEAD(&wd->sc_bslist, dbs_next);
602 		kmem_free(dbs, sizeof(*dbs));
603 	}
604 	wd->sc_bscount = 0;
605 #endif
606 	if (wd->sc_typename != NULL) {
607 		kmem_free(wd->sc_typename, strlen(wd->sc_typename) + 1);
608 		wd->sc_typename = NULL;
609 	}
610 
611 	pmf_device_deregister(self);
612 
613 	wd_sysctl_detach(wd);
614 
615 #ifdef WD_SOFTBADSECT
616 	KASSERT(SLIST_EMPTY(&wd->sc_bslist));
617 	cv_destroy(&wd->sc_bslist_cv);
618 #endif
619 
620 	mutex_destroy(&wd->sc_lock);
621 
622 	wd->drvp->drive_type = ATA_DRIVET_NONE; /* no drive any more here */
623 	wd->drvp->drive_flags = 0;
624 
625 	return (0);
626 }
627 
628 /*
629  * Read/write routine for a buffer.  Validates the arguments and schedules the
630  * transfer.  Does not wait for the transfer to complete.
631  */
632 static void
633 wdstrategy(struct buf *bp)
634 {
635 	struct wd_softc *wd =
636 	    device_lookup_private(&wd_cd, WDUNIT(bp->b_dev));
637 	struct dk_softc *dksc = &wd->sc_dksc;
638 
639 	ATADEBUG_PRINT(("wdstrategy (%s)\n", dksc->sc_xname),
640 	    DEBUG_XFERS);
641 
642 	/* If device invalidated (e.g. media change, door open,
643 	 * device detachment), then error.
644 	 */
645 	if ((wd->sc_flags & WDF_LOADED) == 0 ||
646 	    !device_is_enabled(dksc->sc_dev))
647 		goto err;
648 
649 #ifdef WD_SOFTBADSECT
650 	/*
651 	 * If the transfer about to be attempted contains only a block that
652 	 * is known to be bad then return an error for the transfer without
653 	 * even attempting to start a transfer up under the premis that we
654 	 * will just end up doing more retries for a transfer that will end
655 	 * up failing again.
656 	 */
657 	if (__predict_false(!SLIST_EMPTY(&wd->sc_bslist))) {
658 		struct disklabel *lp = dksc->sc_dkdev.dk_label;
659 		struct disk_badsectors *dbs;
660 		daddr_t blkno, maxblk;
661 
662 		/* convert the block number to absolute */
663 		if (lp->d_secsize >= DEV_BSIZE)
664 			blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE);
665 		else
666 			blkno = bp->b_blkno * (DEV_BSIZE / lp->d_secsize);
667 		if (WDPART(bp->b_dev) != RAW_PART)
668 			blkno += lp->d_partitions[WDPART(bp->b_dev)].p_offset;
669 		maxblk = blkno + (bp->b_bcount / wd->sc_blksize) - 1;
670 
671 		mutex_enter(&wd->sc_lock);
672 		SLIST_FOREACH(dbs, &wd->sc_bslist, dbs_next)
673 			if ((dbs->dbs_min <= bp->b_rawblkno &&
674 			     bp->b_rawblkno <= dbs->dbs_max) ||
675 			    (dbs->dbs_min <= maxblk && maxblk <= dbs->dbs_max)){
676 				mutex_exit(&wd->sc_lock);
677 				goto err;
678 			}
679 		mutex_exit(&wd->sc_lock);
680 	}
681 #endif
682 
683 	dk_strategy(dksc, bp);
684 	return;
685 
686 err:
687 	bp->b_error = EIO;
688 	bp->b_resid = bp->b_bcount;
689 	biodone(bp);
690 }
691 
692 static void
693 wdstart1(struct wd_softc *wd, struct buf *bp, struct ata_xfer *xfer)
694 {
695 	struct dk_softc *dksc = &wd->sc_dksc;
696 	const uint32_t secsize = dksc->sc_dkdev.dk_geom.dg_secsize;
697 
698 	KASSERT(bp == xfer->c_bio.bp || xfer->c_bio.bp == NULL);
699 	KASSERT((xfer->c_flags & (C_WAITACT|C_FREE)) == 0);
700 	KASSERT(mutex_owned(&wd->sc_lock));
701 
702 	/* Reset state, so that retries don't use stale info */
703 	if (__predict_false(xfer->c_retries > 0)) {
704 		xfer->c_flags = 0;
705 		memset(&xfer->c_bio, 0, sizeof(xfer->c_bio));
706 	}
707 
708 	xfer->c_bio.blkno = bp->b_rawblkno;
709 	xfer->c_bio.bcount = bp->b_bcount;
710 	xfer->c_bio.databuf = bp->b_data;
711 	xfer->c_bio.blkdone = 0;
712 	xfer->c_bio.bp = bp;
713 
714 	/* Adjust blkno and bcount if xfer has been already partially done */
715 	if (__predict_false(xfer->c_skip > 0)) {
716 		KASSERT(xfer->c_skip < xfer->c_bio.bcount);
717 		KASSERT((xfer->c_skip % secsize) == 0);
718 		xfer->c_bio.bcount -= xfer->c_skip;
719 		xfer->c_bio.blkno += xfer->c_skip / secsize;
720 	}
721 
722 #ifdef WD_CHAOS_MONKEY
723 	/*
724 	 * Override blkno to be over device capacity to trigger error,
725 	 * but only if it's read, to avoid trashing disk contents should
726 	 * the command be clipped, or otherwise misinterpreted, by the
727 	 * driver or controller.
728 	 */
729 	if (BUF_ISREAD(bp) && xfer->c_retries == 0 && wd->drv_chaos_freq > 0 &&
730 	    (++wd->drv_chaos_cnt % wd->drv_chaos_freq) == 0) {
731 		device_printf(dksc->sc_dev, "%s: chaos xfer %"PRIxPTR"\n",
732 		    __func__, (intptr_t)xfer & PAGE_MASK);
733 		xfer->c_bio.blkno = 7777777 + wd->sc_capacity;
734 		xfer->c_flags |= C_CHAOS;
735 	}
736 #endif
737 
738 	/*
739 	 * If we're retrying, retry in single-sector mode. This will give us
740 	 * the sector number of the problem, and will eventually allow the
741 	 * transfer to succeed. If FUA is requested, we can't actually
742 	 * do this, as ATA_SINGLE is usually executed as PIO transfer by drivers
743 	 * which support it, and that isn't compatible with NCQ/FUA.
744 	 */
745 	if (xfer->c_retries >= WDIORETRIES_SINGLE &&
746 	    (bp->b_flags & B_MEDIA_FUA) == 0)
747 		xfer->c_bio.flags = ATA_SINGLE;
748 	else
749 		xfer->c_bio.flags = 0;
750 
751 	/*
752 	 * request LBA48 transfers when supported by the controller
753 	 * and needed by transfer offset or size.
754 	 */
755 	if (wd->sc_flags & WDF_LBA48 &&
756 	    (((xfer->c_bio.blkno + xfer->c_bio.bcount / secsize) >
757 	    wd->sc_capacity28) ||
758 	    ((xfer->c_bio.bcount / secsize) > 128)))
759 		xfer->c_bio.flags |= ATA_LBA48;
760 
761 	/*
762 	 * If NCQ was negotiated, always use it for the first several attempts.
763 	 * Since device cancels all outstanding requests on error, downgrade
764 	 * to non-NCQ on retry, so that the retried transfer would not cause
765 	 * cascade failure for the other transfers if it fails again.
766 	 * If FUA was requested, we can't downgrade, as that would violate
767 	 * the semantics - FUA would not be honored. In that case, continue
768 	 * retrying with NCQ.
769 	 */
770 	if (WD_USE_NCQ(wd) && (xfer->c_retries < WDIORETRIES_SINGLE ||
771 	    (bp->b_flags & B_MEDIA_FUA) != 0)) {
772 		xfer->c_bio.flags |= ATA_LBA48;
773 		xfer->c_flags |= C_NCQ;
774 
775 		if (WD_USE_NCQ_PRIO(wd) &&
776 		    BIO_GETPRIO(bp) == BPRIO_TIMECRITICAL)
777 			xfer->c_bio.flags |= ATA_PRIO_HIGH;
778 	}
779 
780 	if (wd->sc_flags & WDF_LBA)
781 		xfer->c_bio.flags |= ATA_LBA;
782 	if (bp->b_flags & B_READ) {
783 		xfer->c_bio.flags |= ATA_READ;
784 	} else {
785 		/* it's a write */
786 		wd->sc_flags |= WDF_DIRTY;
787 	}
788 	if (bp->b_flags & B_MEDIA_FUA) {
789 		/* If not using NCQ, the command WRITE DMA FUA EXT is LBA48 */
790 		KASSERT((wd->sc_flags & WDF_LBA48) != 0);
791 		if ((xfer->c_flags & C_NCQ) == 0)
792 			xfer->c_bio.flags |= ATA_LBA48;
793 
794 		xfer->c_bio.flags |= ATA_FUA;
795 	}
796 
797 	if (xfer->c_retries == 0)
798 		wd->inflight++;
799 	mutex_exit(&wd->sc_lock);
800 
801 	/* Queue the xfer */
802 	wd->atabus->ata_bio(wd->drvp, xfer);
803 
804 	mutex_enter(&wd->sc_lock);
805 }
806 
807 static int
808 wd_diskstart(device_t dev, struct buf *bp)
809 {
810 	struct wd_softc *wd = device_private(dev);
811 #ifdef ATADEBUG
812 	struct dk_softc *dksc = &wd->sc_dksc;
813 #endif
814 	struct ata_xfer *xfer;
815 	struct ata_channel *chp;
816 	unsigned openings;
817 	int ticks;
818 
819 	mutex_enter(&wd->sc_lock);
820 
821 	chp = wd->drvp->chnl_softc;
822 
823 	ata_channel_lock(chp);
824 	openings = ata_queue_openings(chp);
825 	ata_channel_unlock(chp);
826 
827 	openings = uimin(openings, wd->drvp->drv_openings);
828 
829 	if (wd->inflight >= openings) {
830 		/*
831 		 * pretend we run out of memory when the queue is full,
832 		 * so that the operation is retried after a minimal
833 		 * delay.
834 		 */
835 		xfer = NULL;
836 		ticks = 1;
837 	} else {
838 		/*
839 		 * If there is no available memory, retry later. This
840 		 * happens very rarely and only under memory pressure,
841 		 * so wait relatively long before retry.
842 		 */
843 		xfer = ata_get_xfer(chp, false);
844 		ticks = hz/2;
845 	}
846 
847 	if (xfer == NULL) {
848 		ATADEBUG_PRINT(("wd_diskstart %s no xfer\n",
849 		    dksc->sc_xname), DEBUG_XFERS);
850 
851 		/*
852 		 * The disk queue is pushed automatically when an I/O
853 		 * operation finishes or another one is queued. We
854 		 * need this extra timeout because an ATA channel
855 		 * might be shared by more than one disk queue and
856 		 * all queues need to be restarted when another slot
857 		 * becomes available.
858 		 */
859 		if (!callout_pending(&wd->sc_restart_diskqueue)) {
860 			callout_reset(&wd->sc_restart_diskqueue, ticks,
861 			    wdrestart, dev);
862 		}
863 
864 		mutex_exit(&wd->sc_lock);
865 		return EAGAIN;
866 	}
867 
868 	wdstart1(wd, bp, xfer);
869 
870 	mutex_exit(&wd->sc_lock);
871 
872 	return 0;
873 }
874 
875 /*
876  * Queue a drive for I/O.
877  */
878 static void
879 wdrestart(void *x)
880 {
881 	device_t self = x;
882 	struct wd_softc *wd = device_private(self);
883 	struct dk_softc *dksc = &wd->sc_dksc;
884 
885 	ATADEBUG_PRINT(("wdstart %s\n", dksc->sc_xname),
886 	    DEBUG_XFERS);
887 
888 	if (!device_is_active(dksc->sc_dev))
889 		return;
890 
891 	dk_start(dksc, NULL);
892 }
893 
894 static void
895 wddone(device_t self, struct ata_xfer *xfer)
896 {
897 	struct wd_softc *wd = device_private(self);
898 	struct dk_softc *dksc = &wd->sc_dksc;
899 	const char *errmsg;
900 	int do_perror = 0;
901 	struct buf *bp;
902 
903 	ATADEBUG_PRINT(("wddone %s\n", dksc->sc_xname),
904 	    DEBUG_XFERS);
905 
906 	if (__predict_false(wddoingadump)) {
907 		/* just drop it to the floor */
908 		ata_free_xfer(wd->drvp->chnl_softc, xfer);
909 		return;
910 	}
911 
912 	bp = xfer->c_bio.bp;
913 	KASSERT(bp != NULL);
914 
915 	bp->b_resid = xfer->c_bio.bcount;
916 	switch (xfer->c_bio.error) {
917 	case ERR_DMA:
918 		errmsg = "DMA error";
919 		goto retry;
920 	case ERR_DF:
921 		errmsg = "device fault";
922 		goto retry;
923 	case TIMEOUT:
924 		errmsg = "device timeout";
925 		goto retry;
926 	case REQUEUE:
927 		errmsg = "requeue";
928 		goto retry2;
929 	case ERR_RESET:
930 		errmsg = "channel reset";
931 		goto retry2;
932 	case ERROR:
933 		/* Don't care about media change bits */
934 		if (xfer->c_bio.r_error != 0 &&
935 		    (xfer->c_bio.r_error & ~(WDCE_MC | WDCE_MCR)) == 0)
936 			goto noerror;
937 		errmsg = "error";
938 		do_perror = 1;
939 retry:		/* Just reset and retry. Can we do more ? */
940 		if ((xfer->c_flags & C_RECOVERED) == 0) {
941 			int wflags = (xfer->c_flags & C_POLL) ? AT_POLL : 0;
942 			ata_channel_lock(wd->drvp->chnl_softc);
943 			ata_thread_run(wd->drvp->chnl_softc, wflags,
944 			    ATACH_TH_DRIVE_RESET, wd->drvp->drive);
945 			ata_channel_unlock(wd->drvp->chnl_softc);
946 		}
947 retry2:
948 		mutex_enter(&wd->sc_lock);
949 
950 		diskerr(bp, "wd", errmsg, LOG_PRINTF,
951 		    xfer->c_bio.blkdone, dksc->sc_dkdev.dk_label);
952 		if (xfer->c_retries < WDIORETRIES)
953 			printf(", xfer %"PRIxPTR", retry %d",
954 			    (intptr_t)xfer & PAGE_MASK,
955 			    xfer->c_retries);
956 		printf("\n");
957 		if (do_perror)
958 			wdperror(wd, xfer);
959 
960 		if (xfer->c_retries < WDIORETRIES) {
961 			xfer->c_retries++;
962 
963 			/* Rerun ASAP if just requeued */
964 			if (xfer->c_bio.error == REQUEUE) {
965 				SLIST_INSERT_HEAD(&wd->sc_requeue_list, xfer,
966 				    c_retrychain);
967 				callout_reset(&wd->sc_requeue_callout,
968 				    1, wdbiorequeue, wd);
969 			} else {
970 				SLIST_INSERT_HEAD(&wd->sc_retry_list, xfer,
971 				    c_retrychain);
972 				callout_reset(&wd->sc_retry_callout,
973 				    RECOVERYTIME, wdbioretry, wd);
974 			}
975 
976 			mutex_exit(&wd->sc_lock);
977 			return;
978 		}
979 
980 		mutex_exit(&wd->sc_lock);
981 
982 #ifdef WD_SOFTBADSECT
983 		/*
984 		 * Not all errors indicate a failed block but those that do,
985 		 * put the block on the bad-block list for the device.  Only
986 		 * do this for reads because the drive should do it for writes,
987 		 * itself, according to Manuel.
988 		 */
989 		if ((bp->b_flags & B_READ) &&
990 		    ((wd->drvp->ata_vers >= 4 && xfer->c_bio.r_error & 64) ||
991 		     (wd->drvp->ata_vers < 4 && xfer->c_bio.r_error & 192))) {
992 			struct disk_badsectors *dbs;
993 
994 			dbs = kmem_zalloc(sizeof *dbs, KM_NOSLEEP);
995 			if (dbs == NULL) {
996 				aprint_error_dev(dksc->sc_dev,
997 				    "failed to add bad block to list\n");
998 				goto out;
999 			}
1000 
1001 			dbs->dbs_min = bp->b_rawblkno;
1002 			dbs->dbs_max = dbs->dbs_min +
1003 			    (bp->b_bcount /wd->sc_blksize) - 1;
1004 			microtime(&dbs->dbs_failedat);
1005 
1006 			mutex_enter(&wd->sc_lock);
1007 			SLIST_INSERT_HEAD(&wd->sc_bslist, dbs, dbs_next);
1008 			wd->sc_bscount++;
1009 			mutex_exit(&wd->sc_lock);
1010 		}
1011 out:
1012 #endif
1013 		bp->b_error = EIO;
1014 		break;
1015 	case NOERROR:
1016 #ifdef WD_CHAOS_MONKEY
1017 		/*
1018 		 * For example Parallels AHCI emulation doesn't actually
1019 		 * return error for the invalid I/O, so just re-run
1020 		 * the request and do not panic.
1021 		 */
1022 		if (__predict_false(xfer->c_flags & C_CHAOS)) {
1023 			xfer->c_bio.error = REQUEUE;
1024 			errmsg = "chaos noerror";
1025 			goto retry2;
1026 		}
1027 #endif
1028 
1029 noerror:	if ((xfer->c_bio.flags & ATA_CORR) || xfer->c_retries > 0)
1030 			device_printf(dksc->sc_dev,
1031 			    "soft error (corrected) xfer %"PRIxPTR"\n",
1032 			    (intptr_t)xfer & PAGE_MASK);
1033 		break;
1034 	case ERR_NODEV:
1035 		bp->b_error = EIO;
1036 		break;
1037 	}
1038 	if (__predict_false(bp->b_error != 0) && bp->b_resid == 0) {
1039 		/*
1040 		 * the disk or controller sometimes report a complete
1041 		 * xfer, when there has been an error. This is wrong,
1042 		 * assume nothing got transferred in this case
1043 		 */
1044 		bp->b_resid = bp->b_bcount;
1045 	}
1046 
1047 	ata_free_xfer(wd->drvp->chnl_softc, xfer);
1048 
1049 	mutex_enter(&wd->sc_lock);
1050 	wd->inflight--;
1051 	mutex_exit(&wd->sc_lock);
1052 	dk_done(dksc, bp);
1053 	dk_start(dksc, NULL);
1054 }
1055 
1056 static void
1057 wdbioretry(void *v)
1058 {
1059 	struct wd_softc *wd = v;
1060 	struct ata_xfer *xfer;
1061 
1062 	ATADEBUG_PRINT(("%s %s\n", __func__, wd->sc_dksc.sc_xname),
1063 	    DEBUG_XFERS);
1064 
1065 	mutex_enter(&wd->sc_lock);
1066 	while ((xfer = SLIST_FIRST(&wd->sc_retry_list))) {
1067 		SLIST_REMOVE_HEAD(&wd->sc_retry_list, c_retrychain);
1068 		wdstart1(wd, xfer->c_bio.bp, xfer);
1069 	}
1070 	mutex_exit(&wd->sc_lock);
1071 }
1072 
1073 static void
1074 wdbiorequeue(void *v)
1075 {
1076 	struct wd_softc *wd = v;
1077 	struct ata_xfer *xfer;
1078 
1079 	ATADEBUG_PRINT(("%s %s\n", __func__, wd->sc_dksc.sc_xname),
1080 	    DEBUG_XFERS);
1081 
1082 	mutex_enter(&wd->sc_lock);
1083 	while ((xfer = SLIST_FIRST(&wd->sc_requeue_list))) {
1084 		SLIST_REMOVE_HEAD(&wd->sc_requeue_list, c_retrychain);
1085 		wdstart1(wd, xfer->c_bio.bp, xfer);
1086 	}
1087 	mutex_exit(&wd->sc_lock);
1088 }
1089 
1090 static void
1091 wdminphys(struct buf *bp)
1092 {
1093 	const struct wd_softc * const wd =
1094 	    device_lookup_private(&wd_cd, WDUNIT(bp->b_dev));
1095 	int maxsectors;
1096 
1097 	/*
1098 	 * The limit is actually 65536 for LBA48 and 256 for non-LBA48,
1099 	 * but that requires to set the count for the ATA command
1100 	 * to 0, which is somewhat error prone, so better stay safe.
1101 	 */
1102 	if (wd->sc_flags & WDF_LBA48)
1103 		maxsectors = 65535;
1104 	else
1105 		maxsectors = 128;
1106 
1107 	if (bp->b_bcount > (wd->sc_blksize * maxsectors))
1108 		bp->b_bcount = (wd->sc_blksize * maxsectors);
1109 
1110 	minphys(bp);
1111 }
1112 
1113 static void
1114 wd_iosize(device_t dev, int *count)
1115 {
1116 	struct buf B;
1117 	int bmaj;
1118 
1119 	bmaj       = bdevsw_lookup_major(&wd_bdevsw);
1120 	B.b_dev    = MAKEWDDEV(bmaj,device_unit(dev),RAW_PART);
1121 	B.b_bcount = *count;
1122 
1123 	wdminphys(&B);
1124 
1125 	*count = B.b_bcount;
1126 }
1127 
1128 static int
1129 wdread(dev_t dev, struct uio *uio, int flags)
1130 {
1131 
1132 	ATADEBUG_PRINT(("wdread\n"), DEBUG_XFERS);
1133 	return (physio(wdstrategy, NULL, dev, B_READ, wdminphys, uio));
1134 }
1135 
1136 static int
1137 wdwrite(dev_t dev, struct uio *uio, int flags)
1138 {
1139 
1140 	ATADEBUG_PRINT(("wdwrite\n"), DEBUG_XFERS);
1141 	return (physio(wdstrategy, NULL, dev, B_WRITE, wdminphys, uio));
1142 }
1143 
1144 static int
1145 wdopen(dev_t dev, int flag, int fmt, struct lwp *l)
1146 {
1147 	struct wd_softc *wd;
1148 	struct dk_softc *dksc;
1149 	int unit, part, error;
1150 
1151 	ATADEBUG_PRINT(("wdopen\n"), DEBUG_FUNCS);
1152 	unit = WDUNIT(dev);
1153 	wd = device_lookup_private(&wd_cd, unit);
1154 	if (wd == NULL)
1155 		return (ENXIO);
1156 	dksc = &wd->sc_dksc;
1157 
1158 	if (! device_is_active(dksc->sc_dev))
1159 		return (ENODEV);
1160 
1161 	part = WDPART(dev);
1162 
1163 	if (wd->sc_capacity == 0)
1164 		return (ENODEV);
1165 
1166 	/*
1167 	 * If any partition is open, but the disk has been invalidated,
1168 	 * disallow further opens.
1169 	 */
1170 	if ((wd->sc_flags & (WDF_OPEN | WDF_LOADED)) == WDF_OPEN) {
1171 		if (part != RAW_PART || fmt != S_IFCHR)
1172 			return EIO;
1173 	}
1174 
1175 	error = dk_open(dksc, dev, flag, fmt, l);
1176 
1177 	return error;
1178 }
1179 
1180 /*
1181  * Serialized by caller
1182  */
1183 static int
1184 wd_firstopen(device_t self, dev_t dev, int flag, int fmt)
1185 {
1186 	struct wd_softc *wd = device_private(self);
1187 	struct dk_softc *dksc = &wd->sc_dksc;
1188 	int error;
1189 
1190 	error = wd->atabus->ata_addref(wd->drvp);
1191 	if (error)
1192 		return error;
1193 
1194 	if ((wd->sc_flags & WDF_LOADED) == 0) {
1195 		int param_error;
1196 
1197 		/* Load the physical device parameters. */
1198 		param_error = wd_get_params(wd, &wd->sc_params);
1199 		if (param_error != 0) {
1200 			aprint_error_dev(dksc->sc_dev, "IDENTIFY failed\n");
1201 			error = EIO;
1202 			goto bad;
1203 		}
1204 		wd_set_geometry(wd);
1205 		wd->sc_flags |= WDF_LOADED;
1206 	}
1207 
1208 	wd->sc_flags |= WDF_OPEN;
1209 	return 0;
1210 
1211 bad:
1212 	wd->atabus->ata_delref(wd->drvp);
1213 	return error;
1214 }
1215 
1216 /*
1217  * Caller must hold wd->sc_dk.dk_openlock.
1218  */
1219 static int
1220 wd_lastclose(device_t self)
1221 {
1222 	struct wd_softc *wd = device_private(self);
1223 
1224 	KASSERTMSG(bufq_peek(wd->sc_dksc.sc_bufq) == NULL, "bufq not empty");
1225 
1226 	if (wd->sc_flags & WDF_DIRTY)
1227 		wd_flushcache(wd, AT_WAIT);
1228 
1229 	wd->atabus->ata_delref(wd->drvp);
1230 	wd->sc_flags &= ~WDF_OPEN;
1231 
1232 	return 0;
1233 }
1234 
1235 static int
1236 wdclose(dev_t dev, int flag, int fmt, struct lwp *l)
1237 {
1238 	struct wd_softc *wd;
1239 	struct dk_softc *dksc;
1240 	int unit;
1241 
1242 	unit = WDUNIT(dev);
1243 	wd = device_lookup_private(&wd_cd, unit);
1244 	dksc = &wd->sc_dksc;
1245 
1246 	return dk_close(dksc, dev, flag, fmt, l);
1247 }
1248 
1249 void
1250 wdperror(const struct wd_softc *wd, struct ata_xfer *xfer)
1251 {
1252 	static const char *const errstr0_3[] = {"address mark not found",
1253 	    "track 0 not found", "aborted command", "media change requested",
1254 	    "id not found", "media changed", "uncorrectable data error",
1255 	    "bad block detected"};
1256 	static const char *const errstr4_5[] = {
1257 	    "obsolete (address mark not found)",
1258 	    "no media/write protected", "aborted command",
1259 	    "media change requested", "id not found", "media changed",
1260 	    "uncorrectable data error", "interface CRC error"};
1261 	const char *const *errstr;
1262 	int i;
1263 	const char *sep = "";
1264 
1265 	const struct dk_softc *dksc = &wd->sc_dksc;
1266 	const char *devname = dksc->sc_xname;
1267 	struct ata_drive_datas *drvp = wd->drvp;
1268 	int errno = xfer->c_bio.r_error;
1269 
1270 	if (drvp->ata_vers >= 4)
1271 		errstr = errstr4_5;
1272 	else
1273 		errstr = errstr0_3;
1274 
1275 	printf("%s: (", devname);
1276 
1277 	if (errno == 0)
1278 		printf("error not notified");
1279 
1280 	for (i = 0; i < 8; i++) {
1281 		if (errno & (1 << i)) {
1282 			printf("%s%s", sep, errstr[i]);
1283 			sep = ", ";
1284 		}
1285 	}
1286 	printf(")\n");
1287 }
1288 
1289 int
1290 wdioctl(dev_t dev, u_long cmd, void *addr, int flag, struct lwp *l)
1291 {
1292 	struct wd_softc *wd =
1293 	    device_lookup_private(&wd_cd, WDUNIT(dev));
1294 	struct dk_softc *dksc = &wd->sc_dksc;
1295 
1296 	ATADEBUG_PRINT(("wdioctl\n"), DEBUG_FUNCS);
1297 
1298 	if ((wd->sc_flags & WDF_LOADED) == 0)
1299 		return EIO;
1300 
1301 	switch (cmd) {
1302 #ifdef HAS_BAD144_HANDLING
1303 	case DIOCSBAD:
1304 		if ((flag & FWRITE) == 0)
1305 			return EBADF;
1306 		dksc->sc_dkdev.dk_cpulabel->bad = *(struct dkbad *)addr;
1307 		dksc->sc_dkdev.dk_label->d_flags |= D_BADSECT;
1308 		bad144intern(wd);
1309 		return 0;
1310 #endif
1311 #ifdef WD_SOFTBADSECT
1312 	case DIOCBSLIST: {
1313 		uint32_t count, missing, skip;
1314 		struct disk_badsecinfo dbsi;
1315 		struct disk_badsectors *dbs, dbsbuf;
1316 		size_t available;
1317 		uint8_t *laddr;
1318 		int error;
1319 
1320 		dbsi = *(struct disk_badsecinfo *)addr;
1321 		missing = wd->sc_bscount;
1322 		count = 0;
1323 		available = dbsi.dbsi_bufsize;
1324 		skip = dbsi.dbsi_skip;
1325 		laddr = (uint8_t *)dbsi.dbsi_buffer;
1326 
1327 		/*
1328 		 * We start this loop with the expectation that all of the
1329 		 * entries will be missed and decrement this counter each
1330 		 * time we either skip over one (already copied out) or
1331 		 * we actually copy it back to user space.  The structs
1332 		 * holding the bad sector information are copied directly
1333 		 * back to user space whilst the summary is returned via
1334 		 * the struct passed in via the ioctl.
1335 		 */
1336 		error = 0;
1337 		mutex_enter(&wd->sc_lock);
1338 		wd->sc_bslist_inuse++;
1339 		SLIST_FOREACH(dbs, &wd->sc_bslist, dbs_next) {
1340 			if (skip > 0) {
1341 				missing--;
1342 				skip--;
1343 				continue;
1344 			}
1345 			if (available < sizeof(*dbs))
1346 				break;
1347 			available -= sizeof(*dbs);
1348 			memset(&dbsbuf, 0, sizeof(dbsbuf));
1349 			dbsbuf.dbs_min = dbs->dbs_min;
1350 			dbsbuf.dbs_max = dbs->dbs_max;
1351 			dbsbuf.dbs_failedat = dbs->dbs_failedat;
1352 			mutex_exit(&wd->sc_lock);
1353 			error = copyout(&dbsbuf, laddr, sizeof(dbsbuf));
1354 			mutex_enter(&wd->sc_lock);
1355 			if (error)
1356 				break;
1357 			laddr += sizeof(*dbs);
1358 			missing--;
1359 			count++;
1360 		}
1361 		if (--wd->sc_bslist_inuse == 0)
1362 			cv_broadcast(&wd->sc_bslist_cv);
1363 		mutex_exit(&wd->sc_lock);
1364 		dbsi.dbsi_left = missing;
1365 		dbsi.dbsi_copied = count;
1366 		*(struct disk_badsecinfo *)addr = dbsi;
1367 
1368 		/*
1369 		 * If we copied anything out, ignore error and return
1370 		 * success -- can't back it out.
1371 		 */
1372 		return count ? 0 : error;
1373 	}
1374 
1375 	case DIOCBSFLUSH: {
1376 		int error;
1377 
1378 		/* Clean out the bad sector list */
1379 		mutex_enter(&wd->sc_lock);
1380 		while (wd->sc_bslist_inuse) {
1381 			error = cv_wait_sig(&wd->sc_bslist_cv, &wd->sc_lock);
1382 			if (error) {
1383 				mutex_exit(&wd->sc_lock);
1384 				return error;
1385 			}
1386 		}
1387 		while (!SLIST_EMPTY(&wd->sc_bslist)) {
1388 			struct disk_badsectors *dbs =
1389 			    SLIST_FIRST(&wd->sc_bslist);
1390 			SLIST_REMOVE_HEAD(&wd->sc_bslist, dbs_next);
1391 			mutex_exit(&wd->sc_lock);
1392 			kmem_free(dbs, sizeof(*dbs));
1393 			mutex_enter(&wd->sc_lock);
1394 		}
1395 		mutex_exit(&wd->sc_lock);
1396 		wd->sc_bscount = 0;
1397 		return 0;
1398 	}
1399 #endif
1400 
1401 #ifdef notyet
1402 	case DIOCWFORMAT:
1403 		if ((flag & FWRITE) == 0)
1404 			return EBADF;
1405 		{
1406 		register struct format_op *fop;
1407 		struct iovec aiov;
1408 		struct uio auio;
1409 		int error1;
1410 
1411 		fop = (struct format_op *)addr;
1412 		aiov.iov_base = fop->df_buf;
1413 		aiov.iov_len = fop->df_count;
1414 		auio.uio_iov = &aiov;
1415 		auio.uio_iovcnt = 1;
1416 		auio.uio_resid = fop->df_count;
1417 		auio.uio_offset =
1418 			fop->df_startblk * wd->sc_dk.dk_label->d_secsize;
1419 		auio.uio_vmspace = l->l_proc->p_vmspace;
1420 		error1 = physio(wdformat, NULL, dev, B_WRITE, wdminphys,
1421 		    &auio);
1422 		fop->df_count -= auio.uio_resid;
1423 		fop->df_reg[0] = wdc->sc_status;
1424 		fop->df_reg[1] = wdc->sc_error;
1425 		return error1;
1426 		}
1427 #endif
1428 	case DIOCGCACHE:
1429 		return wd_getcache(wd, (int *)addr);
1430 
1431 	case DIOCSCACHE:
1432 		return wd_setcache(wd, *(int *)addr);
1433 
1434 	case DIOCCACHESYNC:
1435 		return wd_flushcache(wd, AT_WAIT);
1436 
1437 	case ATAIOCCOMMAND:
1438 		/*
1439 		 * Make sure this command is (relatively) safe first
1440 		 */
1441 		if ((((atareq_t *) addr)->flags & ATACMD_READ) == 0 &&
1442 		    (flag & FWRITE) == 0)
1443 			return (EBADF);
1444 		{
1445 		struct wd_ioctl *wi;
1446 		atareq_t *atareq = (atareq_t *) addr;
1447 		int error1;
1448 
1449 		wi = wi_get(wd);
1450 		wi->wi_atareq = *atareq;
1451 
1452 		if (atareq->datalen && atareq->flags &
1453 		    (ATACMD_READ | ATACMD_WRITE)) {
1454 			void *tbuf;
1455 			if (atareq->datalen < DEV_BSIZE
1456 			    && atareq->command == WDCC_IDENTIFY) {
1457 				tbuf = kmem_zalloc(DEV_BSIZE, KM_SLEEP);
1458 				wi->wi_iov.iov_base = tbuf;
1459 				wi->wi_iov.iov_len = DEV_BSIZE;
1460 				UIO_SETUP_SYSSPACE(&wi->wi_uio);
1461 			} else {
1462 				tbuf = NULL;
1463 				wi->wi_iov.iov_base = atareq->databuf;
1464 				wi->wi_iov.iov_len = atareq->datalen;
1465 				wi->wi_uio.uio_vmspace = l->l_proc->p_vmspace;
1466 			}
1467 			wi->wi_uio.uio_iov = &wi->wi_iov;
1468 			wi->wi_uio.uio_iovcnt = 1;
1469 			wi->wi_uio.uio_resid = atareq->datalen;
1470 			wi->wi_uio.uio_offset = 0;
1471 			wi->wi_uio.uio_rw =
1472 			    (atareq->flags & ATACMD_READ) ? B_READ : B_WRITE;
1473 			error1 = physio(wdioctlstrategy, &wi->wi_bp, dev,
1474 			    (atareq->flags & ATACMD_READ) ? B_READ : B_WRITE,
1475 			    wdminphys, &wi->wi_uio);
1476 			if (tbuf != NULL && error1 == 0) {
1477 				error1 = copyout(tbuf, atareq->databuf,
1478 				    atareq->datalen);
1479 				kmem_free(tbuf, DEV_BSIZE);
1480 			}
1481 		} else {
1482 			/* No need to call physio if we don't have any
1483 			   user data */
1484 			wi->wi_bp.b_flags = 0;
1485 			wi->wi_bp.b_data = 0;
1486 			wi->wi_bp.b_bcount = 0;
1487 			wi->wi_bp.b_dev = dev;
1488 			wi->wi_bp.b_proc = l->l_proc;
1489 			wdioctlstrategy(&wi->wi_bp);
1490 			error1 = wi->wi_bp.b_error;
1491 		}
1492 		*atareq = wi->wi_atareq;
1493 		wi_free(wi);
1494 		return(error1);
1495 		}
1496 
1497 	case DIOCGSECTORALIGN: {
1498 		struct disk_sectoralign *dsa = addr;
1499 		int part = WDPART(dev);
1500 
1501 		*dsa = wd->sc_sectoralign;
1502 		if (part != RAW_PART) {
1503 			struct disklabel *lp = dksc->sc_dkdev.dk_label;
1504 			daddr_t offset = lp->d_partitions[part].p_offset;
1505 			uint32_t r = offset % dsa->dsa_alignment;
1506 
1507 			if (r < dsa->dsa_firstaligned)
1508 				dsa->dsa_firstaligned = dsa->dsa_firstaligned
1509 				    - r;
1510 			else
1511 				dsa->dsa_firstaligned = (dsa->dsa_firstaligned
1512 				    + dsa->dsa_alignment) - r;
1513 		}
1514 
1515 		return 0;
1516 	}
1517 
1518 	default:
1519 		return dk_ioctl(dksc, dev, cmd, addr, flag, l);
1520 	}
1521 
1522 #ifdef DIAGNOSTIC
1523 	panic("wdioctl: impossible");
1524 #endif
1525 }
1526 
1527 static int
1528 wd_discard(device_t dev, off_t pos, off_t len)
1529 {
1530 	struct wd_softc *wd = device_private(dev);
1531 	daddr_t bno;
1532 	long size, done;
1533 	long maxatonce, amount;
1534 	int result;
1535 
1536 	if (!(wd->sc_params.atap_ata_major & WDC_VER_ATA7)
1537 	    || !(wd->sc_params.support_dsm & ATA_SUPPORT_DSM_TRIM)) {
1538 		/* not supported; ignore request */
1539 		ATADEBUG_PRINT(("wddiscard (unsupported)\n"), DEBUG_FUNCS);
1540 		return 0;
1541 	}
1542 	maxatonce = 0xffff; /*wd->sc_params.max_dsm_blocks*/
1543 
1544 	ATADEBUG_PRINT(("wddiscard\n"), DEBUG_FUNCS);
1545 
1546 	if ((wd->sc_flags & WDF_LOADED) == 0)
1547 		return EIO;
1548 
1549 	/* round the start up and the end down */
1550 	bno = (pos + wd->sc_blksize - 1) / wd->sc_blksize;
1551 	size = ((pos + len) / wd->sc_blksize) - bno;
1552 
1553 	done = 0;
1554 	while (done < size) {
1555 	     amount = size - done;
1556 	     if (amount > maxatonce) {
1557 		     amount = maxatonce;
1558 	     }
1559 	     result = wd_trim(wd, bno + done, amount);
1560 	     if (result) {
1561 		     return result;
1562 	     }
1563 	     done += amount;
1564 	}
1565 	return 0;
1566 }
1567 
1568 static int
1569 wddiscard(dev_t dev, off_t pos, off_t len)
1570 {
1571 	struct wd_softc *wd;
1572 	struct dk_softc *dksc;
1573 	int unit;
1574 
1575 	unit = WDUNIT(dev);
1576 	wd = device_lookup_private(&wd_cd, unit);
1577 	dksc = &wd->sc_dksc;
1578 
1579 	return dk_discard(dksc, dev, pos, len);
1580 }
1581 
1582 #ifdef B_FORMAT
1583 int
1584 wdformat(struct buf *bp)
1585 {
1586 
1587 	bp->b_flags |= B_FORMAT;
1588 	return wdstrategy(bp);
1589 }
1590 #endif
1591 
1592 int
1593 wdsize(dev_t dev)
1594 {
1595 	struct wd_softc *wd;
1596 	struct dk_softc *dksc;
1597 	int unit;
1598 
1599 	ATADEBUG_PRINT(("wdsize\n"), DEBUG_FUNCS);
1600 
1601 	unit = WDUNIT(dev);
1602 	wd = device_lookup_private(&wd_cd, unit);
1603 	if (wd == NULL)
1604 		return (-1);
1605 	dksc = &wd->sc_dksc;
1606 
1607 	if (!device_is_active(dksc->sc_dev))
1608 		return (-1);
1609 
1610 	return dk_size(dksc, dev);
1611 }
1612 
1613 /*
1614  * Dump core after a system crash.
1615  */
1616 static int
1617 wddump(dev_t dev, daddr_t blkno, void *va, size_t size)
1618 {
1619 	struct wd_softc *wd;
1620 	struct dk_softc *dksc;
1621 	int unit;
1622 
1623 	/* Check if recursive dump; if so, punt. */
1624 	if (wddoingadump)
1625 		return EFAULT;
1626 	wddoingadump = 1;
1627 
1628 	unit = WDUNIT(dev);
1629 	wd = device_lookup_private(&wd_cd, unit);
1630 	if (wd == NULL)
1631 		return (ENXIO);
1632 	dksc = &wd->sc_dksc;
1633 
1634 	return dk_dump(dksc, dev, blkno, va, size, 0);
1635 }
1636 
1637 static int
1638 wd_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
1639 {
1640 	struct wd_softc *wd = device_private(dev);
1641 	struct dk_softc *dksc = &wd->sc_dksc;
1642 	struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
1643 	struct ata_xfer *xfer = &wd->dump_xfer;
1644 	int err;
1645 
1646 	/* Recalibrate, if first dump transfer. */
1647 	if (wddumprecalibrated == 0) {
1648 		wddumprecalibrated = 1;
1649 		ata_channel_lock(wd->drvp->chnl_softc);
1650 		/* This will directly execute the reset due to AT_POLL */
1651 		ata_thread_run(wd->drvp->chnl_softc, AT_POLL,
1652 		    ATACH_TH_DRIVE_RESET, wd->drvp->drive);
1653 
1654 		wd->drvp->state = RESET;
1655 		ata_channel_unlock(wd->drvp->chnl_softc);
1656 	}
1657 
1658 	memset(xfer, 0, sizeof(*xfer));
1659 	xfer->c_flags |= C_PRIVATE_ALLOC | C_SKIP_QUEUE;
1660 
1661 	xfer->c_bio.blkno = blkno;
1662 	xfer->c_bio.flags = ATA_POLL;
1663 	if (wd->sc_flags & WDF_LBA48 &&
1664 	    (xfer->c_bio.blkno + nblk) > wd->sc_capacity28)
1665 		xfer->c_bio.flags |= ATA_LBA48;
1666 	if (wd->sc_flags & WDF_LBA)
1667 		xfer->c_bio.flags |= ATA_LBA;
1668 	xfer->c_bio.bcount = nblk * dg->dg_secsize;
1669 	xfer->c_bio.databuf = va;
1670 #ifndef WD_DUMP_NOT_TRUSTED
1671 	/* This will poll until the bio is complete */
1672 	wd->atabus->ata_bio(wd->drvp, xfer);
1673 
1674 	switch(err = xfer->c_bio.error) {
1675 	case TIMEOUT:
1676 		printf("wddump: device timed out");
1677 		err = EIO;
1678 		break;
1679 	case ERR_DF:
1680 		printf("wddump: drive fault");
1681 		err = EIO;
1682 		break;
1683 	case ERR_DMA:
1684 		printf("wddump: DMA error");
1685 		err = EIO;
1686 		break;
1687 	case ERROR:
1688 		printf("wddump: ");
1689 		wdperror(wd, xfer);
1690 		err = EIO;
1691 		break;
1692 	case NOERROR:
1693 		err = 0;
1694 		break;
1695 	default:
1696 		panic("wddump: unknown error type %x", err);
1697 	}
1698 
1699 	if (err != 0) {
1700 		printf("\n");
1701 		return err;
1702 	}
1703 #else	/* WD_DUMP_NOT_TRUSTED */
1704 	/* Let's just talk about this first... */
1705 	printf("wd%d: dump addr 0x%x, cylin %d, head %d, sector %d\n",
1706 	    unit, va, cylin, head, sector);
1707 	delay(500 * 1000);	/* half a second */
1708 #endif
1709 
1710 	wddoingadump = 0;
1711 	return 0;
1712 }
1713 
1714 #ifdef HAS_BAD144_HANDLING
1715 /*
1716  * Internalize the bad sector table.
1717  */
1718 void
1719 bad144intern(struct wd_softc *wd)
1720 {
1721 	struct dk_softc *dksc = &wd->sc_dksc;
1722 	struct dkbad *bt = &dksc->sc_dkdev.dk_cpulabel->bad;
1723 	struct disklabel *lp = dksc->sc_dkdev.dk_label;
1724 	int i = 0;
1725 
1726 	ATADEBUG_PRINT(("bad144intern\n"), DEBUG_XFERS);
1727 
1728 	for (; i < NBT_BAD; i++) {
1729 		if (bt->bt_bad[i].bt_cyl == 0xffff)
1730 			break;
1731 		wd->drvp->badsect[i] =
1732 		    bt->bt_bad[i].bt_cyl * lp->d_secpercyl +
1733 		    (bt->bt_bad[i].bt_trksec >> 8) * lp->d_nsectors +
1734 		    (bt->bt_bad[i].bt_trksec & 0xff);
1735 	}
1736 	for (; i < NBT_BAD+1; i++)
1737 		wd->drvp->badsect[i] = -1;
1738 }
1739 #endif
1740 
1741 static void
1742 wd_set_geometry(struct wd_softc *wd)
1743 {
1744 	struct dk_softc *dksc = &wd->sc_dksc;
1745 	struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
1746 
1747 	memset(dg, 0, sizeof(*dg));
1748 
1749 	dg->dg_secperunit = wd->sc_capacity;
1750 	dg->dg_secsize = wd->sc_blksize;
1751 	dg->dg_nsectors = wd->sc_params.atap_sectors;
1752 	dg->dg_ntracks = wd->sc_params.atap_heads;
1753 	if ((wd->sc_flags & WDF_LBA) == 0)
1754 		dg->dg_ncylinders = wd->sc_params.atap_cylinders;
1755 
1756 	disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, wd->sc_typename);
1757 }
1758 
1759 int
1760 wd_get_params(struct wd_softc *wd, struct ataparams *params)
1761 {
1762 	int retry = 0;
1763 	struct ata_channel *chp = wd->drvp->chnl_softc;
1764 	const int flags = AT_WAIT;
1765 
1766 again:
1767 	switch (wd->atabus->ata_get_params(wd->drvp, flags, params)) {
1768 	case CMD_AGAIN:
1769 		return 1;
1770 	case CMD_ERR:
1771 		if (retry == 0) {
1772 			retry++;
1773 			ata_channel_lock(chp);
1774 			(*wd->atabus->ata_reset_drive)(wd->drvp, flags, NULL);
1775 			ata_channel_unlock(chp);
1776 			goto again;
1777 		}
1778 
1779 		if (wd->drvp->drive_type != ATA_DRIVET_OLD)
1780 			return 1;
1781 		/*
1782 		 * We `know' there's a drive here; just assume it's old.
1783 		 * This geometry is only used to read the MBR and print a
1784 		 * (false) attach message.
1785 		 */
1786 		strncpy(params->atap_model, "ST506",
1787 		    sizeof params->atap_model);
1788 		params->atap_config = ATA_CFG_FIXED;
1789 		params->atap_cylinders = 1024;
1790 		params->atap_heads = 8;
1791 		params->atap_sectors = 17;
1792 		params->atap_multi = 1;
1793 		params->atap_capabilities1 = params->atap_capabilities2 = 0;
1794 		wd->drvp->ata_vers = -1; /* Mark it as pre-ATA */
1795 		/* FALLTHROUGH */
1796 	case CMD_OK:
1797 		return 0;
1798 	default:
1799 		panic("wd_get_params: bad return code from ata_get_params");
1800 		/* NOTREACHED */
1801 	}
1802 }
1803 
1804 int
1805 wd_getcache(struct wd_softc *wd, int *bitsp)
1806 {
1807 	struct ataparams params;
1808 
1809 	if (wd_get_params(wd, &params) != 0)
1810 		return EIO;
1811 	if (params.atap_cmd_set1 == 0x0000 ||
1812 	    params.atap_cmd_set1 == 0xffff ||
1813 	    (params.atap_cmd_set1 & WDC_CMD1_CACHE) == 0) {
1814 		*bitsp = 0;
1815 		return 0;
1816 	}
1817 	*bitsp = DKCACHE_WCHANGE | DKCACHE_READ;
1818 	if (params.atap_cmd1_en & WDC_CMD1_CACHE)
1819 		*bitsp |= DKCACHE_WRITE;
1820 
1821 	if (WD_USE_NCQ(wd) || (wd->drvp->drive_flags & ATA_DRIVE_WFUA))
1822 		*bitsp |= DKCACHE_FUA;
1823 
1824 	return 0;
1825 }
1826 
1827 
1828 static int
1829 wd_check_error(const struct dk_softc *dksc, const struct ata_xfer *xfer,
1830     const char *func)
1831 {
1832 	static const char at_errbits[] = "\20\10ERROR\11TIMEOU\12DF";
1833 
1834 	int flags = xfer->c_ata_c.flags;
1835 
1836 	if ((flags & AT_ERROR) != 0 && xfer->c_ata_c.r_error == WDCE_ABRT) {
1837 		/* command not supported */
1838 		aprint_debug_dev(dksc->sc_dev, "%s: not supported\n", func);
1839 		return ENODEV;
1840 	}
1841 	if (flags & (AT_ERROR | AT_TIMEOU | AT_DF)) {
1842 		char sbuf[sizeof(at_errbits) + 64];
1843 		snprintb(sbuf, sizeof(sbuf), at_errbits, flags);
1844 		aprint_error_dev(dksc->sc_dev, "%s: status=%s\n", func, sbuf);
1845 		return EIO;
1846 	}
1847 	return 0;
1848 }
1849 
1850 int
1851 wd_setcache(struct wd_softc *wd, int bits)
1852 {
1853 	struct dk_softc *dksc = &wd->sc_dksc;
1854 	struct ataparams params;
1855 	struct ata_xfer *xfer;
1856 	int error;
1857 
1858 	if (wd_get_params(wd, &params) != 0)
1859 		return EIO;
1860 
1861 	if (params.atap_cmd_set1 == 0x0000 ||
1862 	    params.atap_cmd_set1 == 0xffff ||
1863 	    (params.atap_cmd_set1 & WDC_CMD1_CACHE) == 0)
1864 		return EOPNOTSUPP;
1865 
1866 	if ((bits & DKCACHE_READ) == 0 ||
1867 	    (bits & DKCACHE_SAVE) != 0)
1868 		return EOPNOTSUPP;
1869 
1870 	xfer = ata_get_xfer(wd->drvp->chnl_softc, true);
1871 
1872 	xfer->c_ata_c.r_command = SET_FEATURES;
1873 	xfer->c_ata_c.r_st_bmask = 0;
1874 	xfer->c_ata_c.r_st_pmask = 0;
1875 	xfer->c_ata_c.timeout = 30000; /* 30s timeout */
1876 	xfer->c_ata_c.flags = AT_WAIT;
1877 	if (bits & DKCACHE_WRITE)
1878 		xfer->c_ata_c.r_features = WDSF_WRITE_CACHE_EN;
1879 	else
1880 		xfer->c_ata_c.r_features = WDSF_WRITE_CACHE_DS;
1881 
1882 	wd->atabus->ata_exec_command(wd->drvp, xfer);
1883 	ata_wait_cmd(wd->drvp->chnl_softc, xfer);
1884 
1885 	error = wd_check_error(dksc, xfer, __func__);
1886 	ata_free_xfer(wd->drvp->chnl_softc, xfer);
1887 	return error;
1888 }
1889 
1890 static int
1891 wd_standby(struct wd_softc *wd, int flags)
1892 {
1893 	struct dk_softc *dksc = &wd->sc_dksc;
1894 	struct ata_xfer *xfer;
1895 	int error;
1896 
1897 	aprint_debug_dev(dksc->sc_dev, "standby immediate\n");
1898 	xfer = ata_get_xfer(wd->drvp->chnl_softc, true);
1899 
1900 	xfer->c_ata_c.r_command = WDCC_STANDBY_IMMED;
1901 	xfer->c_ata_c.r_st_bmask = WDCS_DRDY;
1902 	xfer->c_ata_c.r_st_pmask = WDCS_DRDY;
1903 	xfer->c_ata_c.flags = flags;
1904 	xfer->c_ata_c.timeout = 30000; /* 30s timeout */
1905 
1906 	wd->atabus->ata_exec_command(wd->drvp, xfer);
1907 	ata_wait_cmd(wd->drvp->chnl_softc, xfer);
1908 
1909 	error = wd_check_error(dksc, xfer, __func__);
1910 	ata_free_xfer(wd->drvp->chnl_softc, xfer);
1911 	return error;
1912 }
1913 
1914 int
1915 wd_flushcache(struct wd_softc *wd, int flags)
1916 {
1917 	struct dk_softc *dksc = &wd->sc_dksc;
1918 	struct ata_xfer *xfer;
1919 	int error;
1920 
1921 	/*
1922 	 * WDCC_FLUSHCACHE is here since ATA-4, but some drives report
1923 	 * only ATA-2 and still support it.
1924 	 */
1925 	if (wd->drvp->ata_vers < 4 &&
1926 	    ((wd->sc_params.atap_cmd_set2 & WDC_CMD2_FC) == 0 ||
1927 	    wd->sc_params.atap_cmd_set2 == 0xffff))
1928 		return ENODEV;
1929 
1930 	xfer = ata_get_xfer(wd->drvp->chnl_softc, true);
1931 
1932 	if ((wd->sc_params.atap_cmd2_en & ATA_CMD2_LBA48) != 0 &&
1933 	    (wd->sc_params.atap_cmd2_en & ATA_CMD2_FCE) != 0) {
1934 		xfer->c_ata_c.r_command = WDCC_FLUSHCACHE_EXT;
1935 		flags |= AT_LBA48;
1936 	} else
1937 		xfer->c_ata_c.r_command = WDCC_FLUSHCACHE;
1938 	xfer->c_ata_c.r_st_bmask = WDCS_DRDY;
1939 	xfer->c_ata_c.r_st_pmask = WDCS_DRDY;
1940 	xfer->c_ata_c.flags = flags | AT_READREG;
1941 	xfer->c_ata_c.timeout = 300000; /* 5m timeout */
1942 
1943 	wd->atabus->ata_exec_command(wd->drvp, xfer);
1944 	ata_wait_cmd(wd->drvp->chnl_softc, xfer);
1945 
1946 	error = wd_check_error(dksc, xfer, __func__);
1947 	wd->sc_flags &= ~WDF_DIRTY;
1948 	ata_free_xfer(wd->drvp->chnl_softc, xfer);
1949 	return error;
1950 }
1951 
1952 /*
1953  * Execute TRIM command, assumes sleep context.
1954  */
1955 static int
1956 wd_trim(struct wd_softc *wd, daddr_t bno, long size)
1957 {
1958 	struct dk_softc *dksc = &wd->sc_dksc;
1959 	struct ata_xfer *xfer;
1960 	int error;
1961 	unsigned char *req;
1962 
1963 	xfer = ata_get_xfer(wd->drvp->chnl_softc, true);
1964 
1965 	req = kmem_zalloc(512, KM_SLEEP);
1966 	req[0] = bno & 0xff;
1967 	req[1] = (bno >> 8) & 0xff;
1968 	req[2] = (bno >> 16) & 0xff;
1969 	req[3] = (bno >> 24) & 0xff;
1970 	req[4] = (bno >> 32) & 0xff;
1971 	req[5] = (bno >> 40) & 0xff;
1972 	req[6] = size & 0xff;
1973 	req[7] = (size >> 8) & 0xff;
1974 
1975 	/*
1976  	 * XXX We could possibly use NCQ TRIM, which supports executing
1977  	 * this command concurrently. It would need some investigation, some
1978  	 * early or not so early disk firmware caused data loss with NCQ TRIM.
1979 	 * atastart() et.al would need to be adjusted to allow and support
1980 	 * running several non-I/O ATA commands in parallel.
1981 	 */
1982 
1983 	xfer->c_ata_c.r_command = ATA_DATA_SET_MANAGEMENT;
1984 	xfer->c_ata_c.r_count = 1;
1985 	xfer->c_ata_c.r_features = ATA_SUPPORT_DSM_TRIM;
1986 	xfer->c_ata_c.r_st_bmask = WDCS_DRDY;
1987 	xfer->c_ata_c.r_st_pmask = WDCS_DRDY;
1988 	xfer->c_ata_c.timeout = 30000; /* 30s timeout */
1989 	xfer->c_ata_c.data = req;
1990 	xfer->c_ata_c.bcount = 512;
1991 	xfer->c_ata_c.flags |= AT_WRITE | AT_WAIT;
1992 
1993 	wd->atabus->ata_exec_command(wd->drvp, xfer);
1994 	ata_wait_cmd(wd->drvp->chnl_softc, xfer);
1995 
1996 	kmem_free(req, 512);
1997 	error = wd_check_error(dksc, xfer, __func__);
1998 	ata_free_xfer(wd->drvp->chnl_softc, xfer);
1999 	return error;
2000 }
2001 
2002 bool
2003 wd_shutdown(device_t dev, int how)
2004 {
2005 	struct wd_softc *wd = device_private(dev);
2006 
2007 	/* the adapter needs to be enabled */
2008 	if (wd->atabus->ata_addref(wd->drvp))
2009 		return true; /* no need to complain */
2010 
2011 	wd_flushcache(wd, AT_POLL);
2012 	if ((how & RB_POWERDOWN) == RB_POWERDOWN)
2013 		wd_standby(wd, AT_POLL);
2014 	return true;
2015 }
2016 
2017 /*
2018  * Allocate space for a ioctl queue structure.  Mostly taken from
2019  * scsipi_ioctl.c
2020  */
2021 struct wd_ioctl *
2022 wi_get(struct wd_softc *wd)
2023 {
2024 	struct wd_ioctl *wi;
2025 
2026 	wi = kmem_zalloc(sizeof(struct wd_ioctl), KM_SLEEP);
2027 	wi->wi_softc = wd;
2028 	buf_init(&wi->wi_bp);
2029 
2030 	return (wi);
2031 }
2032 
2033 /*
2034  * Free an ioctl structure and remove it from our list
2035  */
2036 
2037 void
2038 wi_free(struct wd_ioctl *wi)
2039 {
2040 	buf_destroy(&wi->wi_bp);
2041 	kmem_free(wi, sizeof(*wi));
2042 }
2043 
2044 /*
2045  * Find a wd_ioctl structure based on the struct buf.
2046  */
2047 
2048 struct wd_ioctl *
2049 wi_find(struct buf *bp)
2050 {
2051 	return container_of(bp, struct wd_ioctl, wi_bp);
2052 }
2053 
2054 static uint
2055 wi_sector_size(const struct wd_ioctl * const wi)
2056 {
2057 	switch (wi->wi_atareq.command) {
2058 	case WDCC_READ:
2059 	case WDCC_WRITE:
2060 	case WDCC_READMULTI:
2061 	case WDCC_WRITEMULTI:
2062 	case WDCC_READDMA:
2063 	case WDCC_WRITEDMA:
2064 	case WDCC_READ_EXT:
2065 	case WDCC_WRITE_EXT:
2066 	case WDCC_READMULTI_EXT:
2067 	case WDCC_WRITEMULTI_EXT:
2068 	case WDCC_READDMA_EXT:
2069 	case WDCC_WRITEDMA_EXT:
2070 	case WDCC_READ_FPDMA_QUEUED:
2071 	case WDCC_WRITE_FPDMA_QUEUED:
2072 		return wi->wi_softc->sc_blksize;
2073 	default:
2074 		return 512;
2075 	}
2076 }
2077 
2078 /*
2079  * Ioctl pseudo strategy routine
2080  *
2081  * This is mostly stolen from scsipi_ioctl.c:scsistrategy().  What
2082  * happens here is:
2083  *
2084  * - wdioctl() queues a wd_ioctl structure.
2085  *
2086  * - wdioctl() calls physio/wdioctlstrategy based on whether or not
2087  *   user space I/O is required.  If physio() is called, physio() eventually
2088  *   calls wdioctlstrategy().
2089  *
2090  * - In either case, wdioctlstrategy() calls wd->atabus->ata_exec_command()
2091  *   to perform the actual command
2092  *
2093  * The reason for the use of the pseudo strategy routine is because
2094  * when doing I/O to/from user space, physio _really_ wants to be in
2095  * the loop.  We could put the entire buffer into the ioctl request
2096  * structure, but that won't scale if we want to do things like download
2097  * microcode.
2098  */
2099 
2100 void
2101 wdioctlstrategy(struct buf *bp)
2102 {
2103 	struct wd_ioctl *wi;
2104 	struct ata_xfer *xfer;
2105 	int error = 0;
2106 
2107 	wi = wi_find(bp);
2108 	if (wi == NULL) {
2109 		printf("wdioctlstrategy: "
2110 		    "No matching ioctl request found in queue\n");
2111 		error = EINVAL;
2112 		goto out2;
2113 	}
2114 
2115 	xfer = ata_get_xfer(wi->wi_softc->drvp->chnl_softc, true);
2116 
2117 	/*
2118 	 * Abort if physio broke up the transfer
2119 	 */
2120 
2121 	if (bp->b_bcount != wi->wi_atareq.datalen) {
2122 		printf("physio split wd ioctl request... cannot proceed\n");
2123 		error = EIO;
2124 		goto out;
2125 	}
2126 
2127 	/*
2128 	 * Abort if we didn't get a buffer size that was a multiple of
2129 	 * our sector size (or overflows CHS/LBA28 sector count)
2130 	 */
2131 
2132 	if ((bp->b_bcount % wi_sector_size(wi)) != 0 ||
2133 	    (bp->b_bcount / wi_sector_size(wi)) >=
2134 	     (1 << NBBY)) {
2135 		error = EINVAL;
2136 		goto out;
2137 	}
2138 
2139 	/*
2140 	 * Make sure a timeout was supplied in the ioctl request
2141 	 */
2142 
2143 	if (wi->wi_atareq.timeout == 0) {
2144 		error = EINVAL;
2145 		goto out;
2146 	}
2147 
2148 	if (wi->wi_atareq.flags & ATACMD_READ)
2149 		xfer->c_ata_c.flags |= AT_READ;
2150 	else if (wi->wi_atareq.flags & ATACMD_WRITE)
2151 		xfer->c_ata_c.flags |= AT_WRITE;
2152 
2153 	if (wi->wi_atareq.flags & ATACMD_READREG)
2154 		xfer->c_ata_c.flags |= AT_READREG;
2155 
2156 	if ((wi->wi_atareq.flags & ATACMD_LBA) != 0)
2157 		xfer->c_ata_c.flags |= AT_LBA;
2158 
2159 	xfer->c_ata_c.flags |= AT_WAIT;
2160 
2161 	xfer->c_ata_c.timeout = wi->wi_atareq.timeout;
2162 	xfer->c_ata_c.r_command = wi->wi_atareq.command;
2163 	xfer->c_ata_c.r_lba = ((wi->wi_atareq.head & 0x0f) << 24) |
2164 	    (wi->wi_atareq.cylinder << 8) |
2165 	    wi->wi_atareq.sec_num;
2166 	xfer->c_ata_c.r_count = wi->wi_atareq.sec_count;
2167 	xfer->c_ata_c.r_features = wi->wi_atareq.features;
2168 	xfer->c_ata_c.r_st_bmask = WDCS_DRDY;
2169 	xfer->c_ata_c.r_st_pmask = WDCS_DRDY;
2170 	xfer->c_ata_c.data = wi->wi_bp.b_data;
2171 	xfer->c_ata_c.bcount = wi->wi_bp.b_bcount;
2172 
2173 	wi->wi_softc->atabus->ata_exec_command(wi->wi_softc->drvp, xfer);
2174 	ata_wait_cmd(wi->wi_softc->drvp->chnl_softc, xfer);
2175 
2176 	if (xfer->c_ata_c.flags & (AT_ERROR | AT_TIMEOU | AT_DF)) {
2177 		if (xfer->c_ata_c.flags & AT_ERROR) {
2178 			wi->wi_atareq.retsts = ATACMD_ERROR;
2179 			wi->wi_atareq.error = xfer->c_ata_c.r_error;
2180 		} else if (xfer->c_ata_c.flags & AT_DF)
2181 			wi->wi_atareq.retsts = ATACMD_DF;
2182 		else
2183 			wi->wi_atareq.retsts = ATACMD_TIMEOUT;
2184 	} else {
2185 		wi->wi_atareq.retsts = ATACMD_OK;
2186 		if (wi->wi_atareq.flags & ATACMD_READREG) {
2187 			wi->wi_atareq.command = xfer->c_ata_c.r_status;
2188 			wi->wi_atareq.features = xfer->c_ata_c.r_error;
2189 			wi->wi_atareq.sec_count = xfer->c_ata_c.r_count;
2190 			wi->wi_atareq.sec_num = xfer->c_ata_c.r_lba & 0xff;
2191 			wi->wi_atareq.head = (xfer->c_ata_c.r_device & 0xf0) |
2192 			    ((xfer->c_ata_c.r_lba >> 24) & 0x0f);
2193 			wi->wi_atareq.cylinder =
2194 			    (xfer->c_ata_c.r_lba >> 8) & 0xffff;
2195 			wi->wi_atareq.error = xfer->c_ata_c.r_error;
2196 		}
2197 	}
2198 
2199 out:
2200 	ata_free_xfer(wi->wi_softc->drvp->chnl_softc, xfer);
2201 out2:
2202 	bp->b_error = error;
2203 	if (error)
2204 		bp->b_resid = bp->b_bcount;
2205 	biodone(bp);
2206 }
2207 
2208 static void
2209 wd_sysctl_attach(struct wd_softc *wd)
2210 {
2211 	struct dk_softc *dksc = &wd->sc_dksc;
2212 	const struct sysctlnode *node;
2213 	int error;
2214 
2215 	/* sysctl set-up */
2216 	if (sysctl_createv(&wd->nodelog, 0, NULL, &node,
2217 				0, CTLTYPE_NODE, dksc->sc_xname,
2218 				SYSCTL_DESCR("wd driver settings"),
2219 				NULL, 0, NULL, 0,
2220 				CTL_HW, CTL_CREATE, CTL_EOL) != 0) {
2221 		aprint_error_dev(dksc->sc_dev,
2222 		    "could not create %s.%s sysctl node\n",
2223 		    "hw", dksc->sc_xname);
2224 		return;
2225 	}
2226 
2227 	wd->drv_ncq = true;
2228 	if ((error = sysctl_createv(&wd->nodelog, 0, NULL, NULL,
2229 				CTLFLAG_READWRITE, CTLTYPE_BOOL, "use_ncq",
2230 				SYSCTL_DESCR("use NCQ if supported"),
2231 				NULL, 0, &wd->drv_ncq, 0,
2232 				CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL))
2233 				!= 0) {
2234 		aprint_error_dev(dksc->sc_dev,
2235 		    "could not create %s.%s.use_ncq sysctl - error %d\n",
2236 		    "hw", dksc->sc_xname, error);
2237 		return;
2238 	}
2239 
2240 	wd->drv_ncq_prio = false;
2241 	if ((error = sysctl_createv(&wd->nodelog, 0, NULL, NULL,
2242 				CTLFLAG_READWRITE, CTLTYPE_BOOL, "use_ncq_prio",
2243 				SYSCTL_DESCR("use NCQ PRIORITY if supported"),
2244 				NULL, 0, &wd->drv_ncq_prio, 0,
2245 				CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL))
2246 				!= 0) {
2247 		aprint_error_dev(dksc->sc_dev,
2248 		    "could not create %s.%s.use_ncq_prio sysctl - error %d\n",
2249 		    "hw", dksc->sc_xname, error);
2250 		return;
2251 	}
2252 
2253 #ifdef WD_CHAOS_MONKEY
2254 	wd->drv_chaos_freq = 0;
2255 	if ((error = sysctl_createv(&wd->nodelog, 0, NULL, NULL,
2256 				CTLFLAG_READWRITE, CTLTYPE_INT, "chaos_freq",
2257 				SYSCTL_DESCR("simulated bio read error rate"),
2258 				NULL, 0, &wd->drv_chaos_freq, 0,
2259 				CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL))
2260 				!= 0) {
2261 		aprint_error_dev(dksc->sc_dev,
2262 		    "could not create %s.%s.chaos_freq sysctl - error %d\n",
2263 		    "hw", dksc->sc_xname, error);
2264 		return;
2265 	}
2266 
2267 	wd->drv_chaos_cnt = 0;
2268 	if ((error = sysctl_createv(&wd->nodelog, 0, NULL, NULL,
2269 				CTLFLAG_READONLY, CTLTYPE_INT, "chaos_cnt",
2270 				SYSCTL_DESCR("number of processed bio reads"),
2271 				NULL, 0, &wd->drv_chaos_cnt, 0,
2272 				CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL))
2273 				!= 0) {
2274 		aprint_error_dev(dksc->sc_dev,
2275 		    "could not create %s.%s.chaos_cnt sysctl - error %d\n",
2276 		    "hw", dksc->sc_xname, error);
2277 		return;
2278 	}
2279 #endif
2280 
2281 }
2282 
2283 static void
2284 wd_sysctl_detach(struct wd_softc *wd)
2285 {
2286 	sysctl_teardown(&wd->nodelog);
2287 }
2288 
2289 #ifdef ATADEBUG
2290 int wddebug(void);
2291 
2292 int
2293 wddebug(void)
2294 {
2295 	struct wd_softc *wd;
2296 	  struct dk_softc *dksc;
2297 	  int unit;
2298 
2299 	  for (unit = 0; unit <= 3; unit++) {
2300 		    wd = device_lookup_private(&wd_cd, unit);
2301 		    if (wd == NULL)
2302 				continue;
2303 		    dksc = &wd->sc_dksc;
2304 		printf("%s fl %x bufq %p:\n",
2305 		    dksc->sc_xname, wd->sc_flags, bufq_peek(dksc->sc_bufq));
2306 
2307 		atachannel_debug(wd->drvp->chnl_softc);
2308 	}
2309 	return 0;
2310 }
2311 #endif /* ATADEBUG */
2312