xref: /netbsd-src/sys/dev/ata/wd.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /*	$NetBSD: wd.c,v 1.439 2018/06/03 18:38:35 jdolecek Exp $ */
2 
3 /*
4  * Copyright (c) 1998, 2001 Manuel Bouyer.  All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *	notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *	notice, this list of conditions and the following disclaimer in the
13  *	documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /*-
28  * Copyright (c) 1998, 2003, 2004 The NetBSD Foundation, Inc.
29  * All rights reserved.
30  *
31  * This code is derived from software contributed to The NetBSD Foundation
32  * by Charles M. Hannum and by Onno van der Linden.
33  *
34  * Redistribution and use in source and binary forms, with or without
35  * modification, are permitted provided that the following conditions
36  * are met:
37  * 1. Redistributions of source code must retain the above copyright
38  *    notice, this list of conditions and the following disclaimer.
39  * 2. Redistributions in binary form must reproduce the above copyright
40  *    notice, this list of conditions and the following disclaimer in the
41  *    documentation and/or other materials provided with the distribution.
42  *
43  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
44  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
45  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
46  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
47  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
48  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
49  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
50  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
51  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
52  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
53  * POSSIBILITY OF SUCH DAMAGE.
54  */
55 
56 #include <sys/cdefs.h>
57 __KERNEL_RCSID(0, "$NetBSD: wd.c,v 1.439 2018/06/03 18:38:35 jdolecek Exp $");
58 
59 #include "opt_ata.h"
60 #include "opt_wd.h"
61 
62 #include <sys/param.h>
63 #include <sys/systm.h>
64 #include <sys/kernel.h>
65 #include <sys/conf.h>
66 #include <sys/file.h>
67 #include <sys/stat.h>
68 #include <sys/ioctl.h>
69 #include <sys/buf.h>
70 #include <sys/bufq.h>
71 #include <sys/uio.h>
72 #include <sys/malloc.h>
73 #include <sys/device.h>
74 #include <sys/disklabel.h>
75 #include <sys/disk.h>
76 #include <sys/syslog.h>
77 #include <sys/proc.h>
78 #include <sys/reboot.h>
79 #include <sys/vnode.h>
80 #include <sys/rndsource.h>
81 
82 #include <sys/intr.h>
83 #include <sys/bus.h>
84 
85 #include <dev/ata/atareg.h>
86 #include <dev/ata/atavar.h>
87 #include <dev/ata/wdvar.h>
88 #include <dev/ic/wdcreg.h>
89 #include <sys/ataio.h>
90 #include "locators.h"
91 
92 #include <prop/proplib.h>
93 
94 #define	WDIORETRIES_SINGLE 4	/* number of retries for single-sector */
95 #define	WDIORETRIES	5	/* number of retries before giving up */
96 #define	RECOVERYTIME hz/2	/* time to wait before retrying a cmd */
97 
98 #define	WDUNIT(dev)		DISKUNIT(dev)
99 #define	WDPART(dev)		DISKPART(dev)
100 #define	WDMINOR(unit, part)	DISKMINOR(unit, part)
101 #define	MAKEWDDEV(maj, unit, part)	MAKEDISKDEV(maj, unit, part)
102 
103 #define	WDLABELDEV(dev)	(MAKEWDDEV(major(dev), WDUNIT(dev), RAW_PART))
104 
105 #define DEBUG_FUNCS  0x08
106 #define DEBUG_PROBE  0x10
107 #define DEBUG_DETACH 0x20
108 #define	DEBUG_XFERS  0x40
109 #ifdef ATADEBUG
110 #ifndef ATADEBUG_WD_MASK
111 #define ATADEBUG_WD_MASK 0x0
112 #endif
113 int wdcdebug_wd_mask = ATADEBUG_WD_MASK;
114 #define ATADEBUG_PRINT(args, level) \
115 	if (wdcdebug_wd_mask & (level)) \
116 		printf args
117 #else
118 #define ATADEBUG_PRINT(args, level)
119 #endif
120 
121 static int	wdprobe(device_t, cfdata_t, void *);
122 static void	wdattach(device_t, device_t, void *);
123 static int	wddetach(device_t, int);
124 static void	wdperror(const struct wd_softc *, struct ata_xfer *);
125 
126 static void	wdminphys(struct buf *);
127 
128 static int	wd_firstopen(device_t, dev_t, int, int);
129 static int	wd_lastclose(device_t);
130 static bool	wd_suspend(device_t, const pmf_qual_t *);
131 static int	wd_standby(struct wd_softc *, int);
132 
133 CFATTACH_DECL3_NEW(wd, sizeof(struct wd_softc),
134     wdprobe, wdattach, wddetach, NULL, NULL, NULL, DVF_DETACH_SHUTDOWN);
135 
136 extern struct cfdriver wd_cd;
137 
138 static dev_type_open(wdopen);
139 static dev_type_close(wdclose);
140 static dev_type_read(wdread);
141 static dev_type_write(wdwrite);
142 static dev_type_ioctl(wdioctl);
143 static dev_type_strategy(wdstrategy);
144 static dev_type_dump(wddump);
145 static dev_type_size(wdsize);
146 static dev_type_discard(wddiscard);
147 
148 const struct bdevsw wd_bdevsw = {
149 	.d_open = wdopen,
150 	.d_close = wdclose,
151 	.d_strategy = wdstrategy,
152 	.d_ioctl = wdioctl,
153 	.d_dump = wddump,
154 	.d_psize = wdsize,
155 	.d_discard = wddiscard,
156 	.d_flag = D_DISK
157 };
158 
159 const struct cdevsw wd_cdevsw = {
160 	.d_open = wdopen,
161 	.d_close = wdclose,
162 	.d_read = wdread,
163 	.d_write = wdwrite,
164 	.d_ioctl = wdioctl,
165 	.d_stop = nostop,
166 	.d_tty = notty,
167 	.d_poll = nopoll,
168 	.d_mmap = nommap,
169 	.d_kqfilter = nokqfilter,
170 	.d_discard = wddiscard,
171 	.d_flag = D_DISK
172 };
173 
174 /* #define WD_DUMP_NOT_TRUSTED if you just want to watch */
175 static int wddoingadump = 0;
176 static int wddumprecalibrated = 0;
177 
178 /*
179  * Glue necessary to hook WDCIOCCOMMAND into physio
180  */
181 
182 struct wd_ioctl {
183 	LIST_ENTRY(wd_ioctl) wi_list;
184 	struct buf wi_bp;
185 	struct uio wi_uio;
186 	struct iovec wi_iov;
187 	atareq_t wi_atareq;
188 	struct wd_softc *wi_softc;
189 };
190 
191 static struct	wd_ioctl *wi_find(struct buf *);
192 static void	wi_free(struct wd_ioctl *);
193 static struct	wd_ioctl *wi_get(struct wd_softc *);
194 static void	wdioctlstrategy(struct buf *);
195 
196 static void	wdstart(device_t);
197 static void	wdstart1(struct wd_softc *, struct buf *, struct ata_xfer *);
198 static int	wd_diskstart(device_t, struct buf *);
199 static int	wd_dumpblocks(device_t, void *, daddr_t, int);
200 static void	wd_iosize(device_t, int *);
201 static int	wd_discard(device_t, off_t, off_t);
202 static void	wdbiorestart(void *);
203 static void	wddone(device_t, struct ata_xfer *);
204 static int	wd_get_params(struct wd_softc *, uint8_t, struct ataparams *);
205 static void	wd_set_geometry(struct wd_softc *);
206 static int	wd_flushcache(struct wd_softc *, int, bool);
207 static int	wd_trim(struct wd_softc *, daddr_t, long);
208 static bool	wd_shutdown(device_t, int);
209 
210 static int wd_getcache(struct wd_softc *, int *);
211 static int wd_setcache(struct wd_softc *, int);
212 
213 static void wd_sysctl_attach(struct wd_softc *);
214 static void wd_sysctl_detach(struct wd_softc *);
215 
216 struct dkdriver wddkdriver = {
217 	.d_open = wdopen,
218 	.d_close = wdclose,
219 	.d_strategy = wdstrategy,
220 	.d_minphys = wdminphys,
221 	.d_diskstart = wd_diskstart,
222 	.d_dumpblocks = wd_dumpblocks,
223 	.d_iosize = wd_iosize,
224 	.d_firstopen = wd_firstopen,
225 	.d_lastclose = wd_lastclose,
226 	.d_discard = wd_discard
227 };
228 
229 #ifdef HAS_BAD144_HANDLING
230 static void bad144intern(struct wd_softc *);
231 #endif
232 
233 #define	WD_QUIRK_SPLIT_MOD15_WRITE	0x0001	/* must split certain writes */
234 
235 #define	WD_QUIRK_FMT "\20\1SPLIT_MOD15_WRITE\2FORCE_LBA48"
236 
237 /*
238  * Quirk table for IDE drives.  Put more-specific matches first, since
239  * a simple globing routine is used for matching.
240  */
241 static const struct wd_quirk {
242 	const char *wdq_match;		/* inquiry pattern to match */
243 	int wdq_quirks;			/* drive quirks */
244 } wd_quirk_table[] = {
245 	/*
246 	 * Some Seagate S-ATA drives have a PHY which can get confused
247 	 * with the way data is packetized by some S-ATA controllers.
248 	 *
249 	 * The work-around is to split in two any write transfer whose
250 	 * sector count % 15 == 1 (assuming 512 byte sectors).
251 	 *
252 	 * XXX This is an incomplete list.  There are at least a couple
253 	 * XXX more model numbers.  If you have trouble with such transfers
254 	 * XXX (8K is the most common) on Seagate S-ATA drives, please
255 	 * XXX notify thorpej@NetBSD.org.
256 	 *
257 	 * The ST360015AS has not yet been confirmed to have this
258 	 * issue, however, it is the only other drive in the
259 	 * Seagate Barracuda Serial ATA V family.
260 	 *
261 	 */
262 	{ "ST3120023AS",
263 	  WD_QUIRK_SPLIT_MOD15_WRITE },
264 	{ "ST380023AS",
265 	  WD_QUIRK_SPLIT_MOD15_WRITE },
266 	{ "ST360015AS",
267 	  WD_QUIRK_SPLIT_MOD15_WRITE },
268 	{ NULL,
269 	  0 }
270 };
271 
272 static const struct wd_quirk *
273 wd_lookup_quirks(const char *name)
274 {
275 	const struct wd_quirk *wdq;
276 	const char *estr;
277 
278 	for (wdq = wd_quirk_table; wdq->wdq_match != NULL; wdq++) {
279 		/*
280 		 * We only want exact matches (which include matches
281 		 * against globbing characters).
282 		 */
283 		if (pmatch(name, wdq->wdq_match, &estr) == 2)
284 			return (wdq);
285 	}
286 	return (NULL);
287 }
288 
289 static int
290 wdprobe(device_t parent, cfdata_t match, void *aux)
291 {
292 	struct ata_device *adev = aux;
293 
294 	if (adev == NULL)
295 		return 0;
296 	if (adev->adev_bustype->bustype_type != SCSIPI_BUSTYPE_ATA)
297 		return 0;
298 
299 	if (match->cf_loc[ATA_HLCF_DRIVE] != ATA_HLCF_DRIVE_DEFAULT &&
300 	    match->cf_loc[ATA_HLCF_DRIVE] != adev->adev_drv_data->drive)
301 		return 0;
302 	return 1;
303 }
304 
305 static void
306 wdattach(device_t parent, device_t self, void *aux)
307 {
308 	struct wd_softc *wd = device_private(self);
309 	struct dk_softc *dksc = &wd->sc_dksc;
310 	struct ata_device *adev= aux;
311 	int i, blank;
312 	char tbuf[41], pbuf[9], c, *p, *q;
313 	const struct wd_quirk *wdq;
314 	int dtype = DKTYPE_UNKNOWN;
315 
316 	dksc->sc_dev = self;
317 
318 	ATADEBUG_PRINT(("wdattach\n"), DEBUG_FUNCS | DEBUG_PROBE);
319 	mutex_init(&wd->sc_lock, MUTEX_DEFAULT, IPL_BIO);
320 #ifdef WD_SOFTBADSECT
321 	SLIST_INIT(&wd->sc_bslist);
322 #endif
323 	wd->atabus = adev->adev_bustype;
324 	wd->drvp = adev->adev_drv_data;
325 
326 	wd->drvp->drv_openings = 1;
327 	wd->drvp->drv_start = wdstart;
328 	wd->drvp->drv_done = wddone;
329 	wd->drvp->drv_softc = dksc->sc_dev; /* done in atabusconfig_thread()
330 					     but too late */
331 
332 	aprint_naive("\n");
333 	aprint_normal("\n");
334 
335 	/* read our drive info */
336 	if (wd_get_params(wd, AT_WAIT, &wd->sc_params) != 0) {
337 		aprint_error_dev(self, "IDENTIFY failed\n");
338 		goto out;
339 	}
340 
341 	for (blank = 0, p = wd->sc_params.atap_model, q = tbuf, i = 0;
342 	    i < sizeof(wd->sc_params.atap_model); i++) {
343 		c = *p++;
344 		if (c == '\0')
345 			break;
346 		if (c != ' ') {
347 			if (blank) {
348 				*q++ = ' ';
349 				blank = 0;
350 			}
351 			*q++ = c;
352 		} else
353 			blank = 1;
354 	}
355 	*q++ = '\0';
356 
357 	aprint_normal_dev(self, "<%s>\n", tbuf);
358 
359 	wdq = wd_lookup_quirks(tbuf);
360 	if (wdq != NULL)
361 		wd->sc_quirks = wdq->wdq_quirks;
362 
363 	if (wd->sc_quirks != 0) {
364 		char sbuf[sizeof(WD_QUIRK_FMT) + 64];
365 		snprintb(sbuf, sizeof(sbuf), WD_QUIRK_FMT, wd->sc_quirks);
366 		aprint_normal_dev(self, "quirks %s\n", sbuf);
367 
368 		if (wd->sc_quirks & WD_QUIRK_SPLIT_MOD15_WRITE) {
369 			aprint_error_dev(self, "drive corrupts write transfers with certain controllers, consider replacing\n");
370 		}
371 	}
372 
373 	if ((wd->sc_params.atap_multi & 0xff) > 1) {
374 		wd->drvp->multi = wd->sc_params.atap_multi & 0xff;
375 	} else {
376 		wd->drvp->multi = 1;
377 	}
378 
379 	aprint_verbose_dev(self, "drive supports %d-sector PIO transfers,",
380 	    wd->drvp->multi);
381 
382 	/* 48-bit LBA addressing */
383 	if ((wd->sc_params.atap_cmd2_en & ATA_CMD2_LBA48) != 0)
384 		wd->sc_flags |= WDF_LBA48;
385 
386 	/* Prior to ATA-4, LBA was optional. */
387 	if ((wd->sc_params.atap_capabilities1 & WDC_CAP_LBA) != 0)
388 		wd->sc_flags |= WDF_LBA;
389 #if 0
390 	/* ATA-4 requires LBA. */
391 	if (wd->sc_params.atap_ataversion != 0xffff &&
392 	    wd->sc_params.atap_ataversion >= WDC_VER_ATA4)
393 		wd->sc_flags |= WDF_LBA;
394 #endif
395 
396 	if ((wd->sc_flags & WDF_LBA48) != 0) {
397 		aprint_verbose(" LBA48 addressing\n");
398 		wd->sc_capacity =
399 		    ((uint64_t) wd->sc_params.atap_max_lba[3] << 48) |
400 		    ((uint64_t) wd->sc_params.atap_max_lba[2] << 32) |
401 		    ((uint64_t) wd->sc_params.atap_max_lba[1] << 16) |
402 		    ((uint64_t) wd->sc_params.atap_max_lba[0] <<  0);
403 		wd->sc_capacity28 =
404 		    (wd->sc_params.atap_capacity[1] << 16) |
405 		    wd->sc_params.atap_capacity[0];
406 	} else if ((wd->sc_flags & WDF_LBA) != 0) {
407 		aprint_verbose(" LBA addressing\n");
408 		wd->sc_capacity28 = wd->sc_capacity =
409 		    (wd->sc_params.atap_capacity[1] << 16) |
410 		    wd->sc_params.atap_capacity[0];
411 	} else {
412 		aprint_verbose(" chs addressing\n");
413 		wd->sc_capacity28 = wd->sc_capacity =
414 		    wd->sc_params.atap_cylinders *
415 		    wd->sc_params.atap_heads *
416 		    wd->sc_params.atap_sectors;
417 	}
418 	if ((wd->sc_params.atap_secsz & ATA_SECSZ_VALID_MASK) == ATA_SECSZ_VALID
419 	    && ((wd->sc_params.atap_secsz & ATA_SECSZ_LLS) != 0)) {
420 		wd->sc_blksize = 2ULL *
421 		    ((uint32_t)((wd->sc_params.atap_lls_secsz[1] << 16) |
422 		    wd->sc_params.atap_lls_secsz[0]));
423 	} else {
424 		wd->sc_blksize = 512;
425 	}
426 	wd->sc_capacity512 = (wd->sc_capacity * wd->sc_blksize) / DEV_BSIZE;
427 	format_bytes(pbuf, sizeof(pbuf), wd->sc_capacity * wd->sc_blksize);
428 	aprint_normal_dev(self, "%s, %d cyl, %d head, %d sec, "
429 	    "%d bytes/sect x %llu sectors\n",
430 	    pbuf,
431 	    (wd->sc_flags & WDF_LBA) ? (int)(wd->sc_capacity /
432 		(wd->sc_params.atap_heads * wd->sc_params.atap_sectors)) :
433 		wd->sc_params.atap_cylinders,
434 	    wd->sc_params.atap_heads, wd->sc_params.atap_sectors,
435 	    wd->sc_blksize, (unsigned long long)wd->sc_capacity);
436 
437 	ATADEBUG_PRINT(("%s: atap_dmatiming_mimi=%d, atap_dmatiming_recom=%d\n",
438 	    device_xname(self), wd->sc_params.atap_dmatiming_mimi,
439 	    wd->sc_params.atap_dmatiming_recom), DEBUG_PROBE);
440 
441 	if (wd->sc_blksize <= 0 || !powerof2(wd->sc_blksize) ||
442 	    wd->sc_blksize < DEV_BSIZE || wd->sc_blksize > MAXPHYS) {
443 		aprint_normal_dev(self, "WARNING: block size %u "
444 		    "might not actually work\n", wd->sc_blksize);
445 	}
446 
447 	if (strcmp(wd->sc_params.atap_model, "ST506") == 0)
448 		dtype = DKTYPE_ST506;
449 	else
450 		dtype = DKTYPE_ESDI;
451 
452 out:
453 	/*
454 	 * Initialize and attach the disk structure.
455 	 */
456 	dk_init(dksc, self, dtype);
457 	disk_init(&dksc->sc_dkdev, dksc->sc_xname, &wddkdriver);
458 
459 	/* Attach dk and disk subsystems */
460 	dk_attach(dksc);
461 	disk_attach(&dksc->sc_dkdev);
462 	wd_set_geometry(wd);
463 
464 	bufq_alloc(&dksc->sc_bufq, BUFQ_DISK_DEFAULT_STRAT, BUFQ_SORT_RAWBLOCK);
465 
466 	/* reference to label structure, used by ata code */
467 	wd->drvp->lp = dksc->sc_dkdev.dk_label;
468 
469 	/* Discover wedges on this disk. */
470 	dkwedge_discover(&dksc->sc_dkdev);
471 
472 	if (!pmf_device_register1(self, wd_suspend, NULL, wd_shutdown))
473 		aprint_error_dev(self, "couldn't establish power handler\n");
474 
475 	wd_sysctl_attach(wd);
476 }
477 
478 static bool
479 wd_suspend(device_t dv, const pmf_qual_t *qual)
480 {
481 	struct wd_softc *sc = device_private(dv);
482 
483 	/* the adapter needs to be enabled */
484 	if (sc->atabus->ata_addref(sc->drvp))
485 		return true; /* no need to complain */
486 
487 	wd_flushcache(sc, AT_WAIT, false);
488 	wd_standby(sc, AT_WAIT);
489 
490 	sc->atabus->ata_delref(sc->drvp);
491 	return true;
492 }
493 
494 static int
495 wddetach(device_t self, int flags)
496 {
497 	struct wd_softc *wd = device_private(self);
498 	struct dk_softc *dksc = &wd->sc_dksc;
499 	int bmaj, cmaj, i, mn, rc;
500 
501 	if ((rc = disk_begindetach(&dksc->sc_dkdev, wd_lastclose, self, flags)) != 0)
502 		return rc;
503 
504 	/* locate the major number */
505 	bmaj = bdevsw_lookup_major(&wd_bdevsw);
506 	cmaj = cdevsw_lookup_major(&wd_cdevsw);
507 
508 	/* Nuke the vnodes for any open instances. */
509 	for (i = 0; i < MAXPARTITIONS; i++) {
510 		mn = WDMINOR(device_unit(self), i);
511 		vdevgone(bmaj, mn, mn, VBLK);
512 		vdevgone(cmaj, mn, mn, VCHR);
513 	}
514 
515 	dk_drain(dksc);
516 
517 	/* Kill off any pending commands. */
518 	mutex_enter(&wd->sc_lock);
519 	wd->atabus->ata_killpending(wd->drvp);
520 	mutex_exit(&wd->sc_lock);
521 
522 	bufq_free(dksc->sc_bufq);
523 
524 	if (flags & DETACH_POWEROFF)
525 		wd_standby(wd, AT_POLL);
526 
527 	/* Delete all of our wedges. */
528 	dkwedge_delall(&dksc->sc_dkdev);
529 
530 	/* Detach from the disk list. */
531 	disk_detach(&dksc->sc_dkdev);
532 	disk_destroy(&dksc->sc_dkdev);
533 
534 	dk_detach(dksc);
535 
536 #ifdef WD_SOFTBADSECT
537 	/* Clean out the bad sector list */
538 	while (!SLIST_EMPTY(&wd->sc_bslist)) {
539 		void *head = SLIST_FIRST(&wd->sc_bslist);
540 		SLIST_REMOVE_HEAD(&wd->sc_bslist, dbs_next);
541 		free(head, M_TEMP);
542 	}
543 	wd->sc_bscount = 0;
544 #endif
545 
546 	pmf_device_deregister(self);
547 
548 	wd_sysctl_detach(wd);
549 
550 	mutex_destroy(&wd->sc_lock);
551 
552 	wd->drvp->drive_type = ATA_DRIVET_NONE; /* no drive any more here */
553 	wd->drvp->drive_flags = 0;
554 
555 	return (0);
556 }
557 
558 /*
559  * Read/write routine for a buffer.  Validates the arguments and schedules the
560  * transfer.  Does not wait for the transfer to complete.
561  */
562 static void
563 wdstrategy(struct buf *bp)
564 {
565 	struct wd_softc *wd =
566 	    device_lookup_private(&wd_cd, WDUNIT(bp->b_dev));
567 	struct dk_softc *dksc = &wd->sc_dksc;
568 
569 	ATADEBUG_PRINT(("wdstrategy (%s)\n", dksc->sc_xname),
570 	    DEBUG_XFERS);
571 
572 	/* If device invalidated (e.g. media change, door open,
573 	 * device detachment), then error.
574 	 */
575 	if ((wd->sc_flags & WDF_LOADED) == 0 ||
576 	    !device_is_enabled(dksc->sc_dev))
577 		goto err;
578 
579 #ifdef WD_SOFTBADSECT
580 	/*
581 	 * If the transfer about to be attempted contains only a block that
582 	 * is known to be bad then return an error for the transfer without
583 	 * even attempting to start a transfer up under the premis that we
584 	 * will just end up doing more retries for a transfer that will end
585 	 * up failing again.
586 	 */
587 	if (__predict_false(!SLIST_EMPTY(&wd->sc_bslist))) {
588 		struct disklabel *lp = dksc->sc_dkdev.dk_label;
589 		struct disk_badsectors *dbs;
590 		daddr_t blkno, maxblk;
591 
592 		/* convert the block number to absolute */
593 		if (lp->d_secsize >= DEV_BSIZE)
594 			blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE);
595 		else
596 			blkno = bp->b_blkno * (DEV_BSIZE / lp->d_secsize);
597 		if (WDPART(bp->b_dev) != RAW_PART)
598 			blkno += lp->d_partitions[WDPART(bp->b_dev)].p_offset;
599 		maxblk = blkno + (bp->b_bcount / wd->sc_blksize) - 1;
600 
601 		mutex_enter(&wd->sc_lock);
602 		SLIST_FOREACH(dbs, &wd->sc_bslist, dbs_next)
603 			if ((dbs->dbs_min <= bp->b_rawblkno &&
604 			     bp->b_rawblkno <= dbs->dbs_max) ||
605 			    (dbs->dbs_min <= maxblk && maxblk <= dbs->dbs_max)){
606 				mutex_exit(&wd->sc_lock);
607 				goto err;
608 			}
609 		mutex_exit(&wd->sc_lock);
610 	}
611 #endif
612 
613 	dk_strategy(dksc, bp);
614 	return;
615 
616 err:
617 	bp->b_error = EIO;
618 	bp->b_resid = bp->b_bcount;
619 	biodone(bp);
620 }
621 
622 static void
623 wdstart1(struct wd_softc *wd, struct buf *bp, struct ata_xfer *xfer)
624 {
625 	struct dk_softc *dksc = &wd->sc_dksc;
626 
627 	KASSERT(bp == xfer->c_bio.bp || xfer->c_bio.bp == NULL);
628 	KASSERT((xfer->c_flags & (C_WAITACT|C_FREE)) == 0);
629 
630 	/* Reset state, so that retries don't use stale info */
631 	if (__predict_false(xfer->c_retries > 0)) {
632 		xfer->c_flags = 0;
633 		memset(&xfer->c_bio, 0, sizeof(xfer->c_bio));
634 	}
635 
636 	xfer->c_bio.blkno = bp->b_rawblkno;
637 	xfer->c_bio.bcount = bp->b_bcount;
638 	xfer->c_bio.databuf = bp->b_data;
639 	xfer->c_bio.blkdone = 0;
640 	xfer->c_bio.bp = bp;
641 
642 #ifdef WD_CHAOS_MONKEY
643 	/*
644 	 * Override blkno to be over device capacity to trigger error,
645 	 * but only if it's read, to avoid trashing disk contents should
646 	 * the command be clipped, or otherwise misinterpreted, by the
647 	 * driver or controller.
648 	 */
649 	if (BUF_ISREAD(bp) && xfer->c_retries == 0 && wd->drv_chaos_freq > 0 &&
650 	    (++wd->drv_chaos_cnt % wd->drv_chaos_freq) == 0) {
651 		aprint_normal_dev(dksc->sc_dev, "%s: chaos xfer %d\n",
652 		    __func__, xfer->c_slot);
653 		xfer->c_bio.blkno = 7777777 + wd->sc_capacity;
654 		xfer->c_flags |= C_CHAOS;
655 	}
656 #endif
657 
658 	/*
659 	 * If we're retrying, retry in single-sector mode. This will give us
660 	 * the sector number of the problem, and will eventually allow the
661 	 * transfer to succeed. If FUA is requested, we can't actually
662 	 * do this, as ATA_SINGLE is usually executed as PIO transfer by drivers
663 	 * which support it, and that isn't compatible with NCQ/FUA.
664 	 */
665 	if (xfer->c_retries >= WDIORETRIES_SINGLE &&
666 	    (bp->b_flags & B_MEDIA_FUA) == 0)
667 		xfer->c_bio.flags = ATA_SINGLE;
668 	else
669 		xfer->c_bio.flags = 0;
670 
671 	/*
672 	 * request LBA48 transfers when supported by the controller
673 	 * and needed by transfer offset or size.
674 	 */
675 	if (wd->sc_flags & WDF_LBA48 &&
676 	    (((xfer->c_bio.blkno +
677 	     xfer->c_bio.bcount / dksc->sc_dkdev.dk_geom.dg_secsize) >
678 	    wd->sc_capacity28) ||
679 	    ((xfer->c_bio.bcount / dksc->sc_dkdev.dk_geom.dg_secsize) > 128)))
680 		xfer->c_bio.flags |= ATA_LBA48;
681 
682 	/*
683 	 * If NCQ was negotiated, always use it for the first several attempts.
684 	 * Since device cancels all outstanding requests on error, downgrade
685 	 * to non-NCQ on retry, so that the retried transfer would not cause
686 	 * cascade failure for the other transfers if it fails again.
687 	 * If FUA was requested, we can't downgrade, as that would violate
688 	 * the semantics - FUA would not be honored. In that case, continue
689 	 * retrying with NCQ.
690 	 */
691 	if (WD_USE_NCQ(wd) && (xfer->c_retries < WDIORETRIES_SINGLE ||
692 	    (bp->b_flags & B_MEDIA_FUA) != 0)) {
693 		xfer->c_bio.flags |= ATA_LBA48;
694 		xfer->c_flags |= C_NCQ;
695 
696 		if (WD_USE_NCQ_PRIO(wd) &&
697 		    BIO_GETPRIO(bp) == BPRIO_TIMECRITICAL)
698 			xfer->c_bio.flags |= ATA_PRIO_HIGH;
699 	}
700 
701 	if (wd->sc_flags & WDF_LBA)
702 		xfer->c_bio.flags |= ATA_LBA;
703 	if (bp->b_flags & B_READ)
704 		xfer->c_bio.flags |= ATA_READ;
705 	if (bp->b_flags & B_MEDIA_FUA) {
706 		/* If not using NCQ, the command WRITE DMA FUA EXT is LBA48 */
707 		KASSERT((wd->sc_flags & WDF_LBA48) != 0);
708 		if ((xfer->c_flags & C_NCQ) == 0)
709 			xfer->c_bio.flags |= ATA_LBA48;
710 
711 		xfer->c_bio.flags |= ATA_FUA;
712 	}
713 
714 	switch (wd->atabus->ata_bio(wd->drvp, xfer)) {
715 	case ATACMD_TRY_AGAIN:
716 		panic("wdstart1: try again");
717 		break;
718 	case ATACMD_QUEUED:
719 	case ATACMD_COMPLETE:
720 		break;
721 	default:
722 		panic("wdstart1: bad return code from ata_bio()");
723 	}
724 }
725 
726 static int
727 wd_diskstart(device_t dev, struct buf *bp)
728 {
729 	struct wd_softc *wd = device_private(dev);
730 #ifdef ATADEBUG
731 	struct dk_softc *dksc = &wd->sc_dksc;
732 #endif
733 	struct ata_xfer *xfer;
734 
735 	mutex_enter(&wd->sc_lock);
736 
737 	xfer = ata_get_xfer_ext(wd->drvp->chnl_softc, 0,
738 	    WD_USE_NCQ(wd) ? WD_MAX_OPENINGS(wd) : 0);
739 	if (xfer == NULL) {
740 		ATADEBUG_PRINT(("wd_diskstart %s no xfer\n",
741 		    dksc->sc_xname), DEBUG_XFERS);
742 		mutex_exit(&wd->sc_lock);
743 		return EAGAIN;
744 	}
745 
746 	wdstart1(wd, bp, xfer);
747 
748 	mutex_exit(&wd->sc_lock);
749 
750 	return 0;
751 }
752 
753 /*
754  * Queue a drive for I/O.
755  */
756 static void
757 wdstart(device_t self)
758 {
759 	struct wd_softc *wd = device_private(self);
760 	struct dk_softc *dksc = &wd->sc_dksc;
761 
762 	ATADEBUG_PRINT(("wdstart %s\n", dksc->sc_xname),
763 	    DEBUG_XFERS);
764 
765 	if (!device_is_active(dksc->sc_dev))
766 		return;
767 
768 	mutex_enter(&wd->sc_lock);
769 
770 	/*
771 	 * Do not queue any transfers until flush is finished, so that
772 	 * once flush is pending, it will get handled as soon as xfer
773 	 * is available.
774 	 */
775 	if (ISSET(wd->sc_flags, WDF_FLUSH_PEND)) {
776 		ATADEBUG_PRINT(("wdstart %s flush pend\n",
777 		    dksc->sc_xname), DEBUG_XFERS);
778 		mutex_exit(&wd->sc_lock);
779 		return;
780 	}
781 
782 	mutex_exit(&wd->sc_lock);
783 
784 	dk_start(dksc, NULL);
785 }
786 
787 static void
788 wddone(device_t self, struct ata_xfer *xfer)
789 {
790 	struct wd_softc *wd = device_private(self);
791 	struct dk_softc *dksc = &wd->sc_dksc;
792 	const char *errmsg;
793 	int do_perror = 0;
794 	struct buf *bp;
795 
796 	ATADEBUG_PRINT(("wddone %s\n", dksc->sc_xname),
797 	    DEBUG_XFERS);
798 
799 	if (__predict_false(wddoingadump)) {
800 		/* just drop it to the floor */
801 		ata_free_xfer(wd->drvp->chnl_softc, xfer);
802 		return;
803 	}
804 
805 	bp = xfer->c_bio.bp;
806 	KASSERT(bp != NULL);
807 
808 	bp->b_resid = xfer->c_bio.bcount;
809 	switch (xfer->c_bio.error) {
810 	case ERR_DMA:
811 		errmsg = "DMA error";
812 		goto retry;
813 	case ERR_DF:
814 		errmsg = "device fault";
815 		goto retry;
816 	case TIMEOUT:
817 		errmsg = "device timeout";
818 		goto retry;
819 	case REQUEUE:
820 		errmsg = "requeue";
821 		goto retry2;
822 	case ERR_RESET:
823 		errmsg = "channel reset";
824 		goto retry2;
825 	case ERROR:
826 		/* Don't care about media change bits */
827 		if (xfer->c_bio.r_error != 0 &&
828 		    (xfer->c_bio.r_error & ~(WDCE_MC | WDCE_MCR)) == 0)
829 			goto noerror;
830 		errmsg = "error";
831 		do_perror = 1;
832 retry:		/* Just reset and retry. Can we do more ? */
833 		if ((xfer->c_flags & C_RECOVERED) == 0) {
834 			int wflags = (xfer->c_flags & C_POLL) ? AT_POLL : 0;
835 			(*wd->atabus->ata_reset_drive)(wd->drvp, wflags, NULL);
836 		}
837 retry2:
838 		mutex_enter(&wd->sc_lock);
839 
840 		diskerr(bp, "wd", errmsg, LOG_PRINTF,
841 		    xfer->c_bio.blkdone, dksc->sc_dkdev.dk_label);
842 		if (xfer->c_retries < WDIORETRIES)
843 			printf(", slot %d, retry %d", xfer->c_slot,
844 			    xfer->c_retries + 1);
845 		printf("\n");
846 		if (do_perror)
847 			wdperror(wd, xfer);
848 
849 		if (xfer->c_retries < WDIORETRIES) {
850 			xfer->c_retries++;
851 
852 			/* Rerun ASAP if just requeued */
853 			callout_reset(&xfer->c_retry_callout,
854 			    (xfer->c_bio.error == REQUEUE) ? 1 : RECOVERYTIME,
855 			    wdbiorestart, xfer);
856 
857 			mutex_exit(&wd->sc_lock);
858 			return;
859 		}
860 
861 		mutex_exit(&wd->sc_lock);
862 
863 #ifdef WD_SOFTBADSECT
864 		/*
865 		 * Not all errors indicate a failed block but those that do,
866 		 * put the block on the bad-block list for the device.  Only
867 		 * do this for reads because the drive should do it for writes,
868 		 * itself, according to Manuel.
869 		 */
870 		if ((bp->b_flags & B_READ) &&
871 		    ((wd->drvp->ata_vers >= 4 && xfer->c_bio.r_error & 64) ||
872 		     (wd->drvp->ata_vers < 4 && xfer->c_bio.r_error & 192))) {
873 			struct disk_badsectors *dbs;
874 
875 			dbs = malloc(sizeof *dbs, M_TEMP, M_NOWAIT);
876 			if (dbs == NULL) {
877 				aprint_error_dev(dksc->sc_dev,
878 				    "failed to add bad block to list\n");
879 				goto out;
880 			}
881 
882 			dbs->dbs_min = bp->b_rawblkno;
883 			dbs->dbs_max = dbs->dbs_min +
884 			    (bp->b_bcount /wd->sc_blksize) - 1;
885 			microtime(&dbs->dbs_failedat);
886 
887 			mutex_enter(&wd->sc_lock);
888 			SLIST_INSERT_HEAD(&wd->sc_bslist, dbs, dbs_next);
889 			wd->sc_bscount++;
890 			mutex_exit(&wd->sc_lock);
891 		}
892 out:
893 #endif
894 		bp->b_error = EIO;
895 		break;
896 	case NOERROR:
897 noerror:	if ((xfer->c_bio.flags & ATA_CORR) || xfer->c_retries > 0)
898 			aprint_error_dev(dksc->sc_dev,
899 			    "soft error (corrected) slot %d\n", xfer->c_slot);
900 #ifdef WD_CHAOS_MONKEY
901 		KASSERT((xfer->c_flags & C_CHAOS) == 0);
902 #endif
903 		break;
904 	case ERR_NODEV:
905 		bp->b_error = EIO;
906 		break;
907 	}
908 	if (__predict_false(bp->b_error != 0) && bp->b_resid == 0) {
909 		/*
910 		 * the disk or controller sometimes report a complete
911 		 * xfer, when there has been an error. This is wrong,
912 		 * assume nothing got transfered in this case
913 		 */
914 		bp->b_resid = bp->b_bcount;
915 	}
916 
917 	ata_free_xfer(wd->drvp->chnl_softc, xfer);
918 
919 	dk_done(dksc, bp);
920 	ata_channel_start(wd->drvp->chnl_softc, wd->drvp->drive);
921 }
922 
923 static void
924 wdbiorestart(void *v)
925 {
926 	struct ata_xfer *xfer = v;
927 	struct buf *bp = xfer->c_bio.bp;
928 	struct wd_softc *wd = device_lookup_private(&wd_cd, WDUNIT(bp->b_dev));
929 #ifdef ATADEBUG
930 	struct dk_softc *dksc = &wd->sc_dksc;
931 #endif
932 
933 	ATADEBUG_PRINT(("wdbiorestart %s\n", dksc->sc_xname),
934 	    DEBUG_XFERS);
935 
936 	mutex_enter(&wd->sc_lock);
937 	wdstart1(wd, bp, xfer);
938 	mutex_exit(&wd->sc_lock);
939 }
940 
941 static void
942 wdminphys(struct buf *bp)
943 {
944 	const struct wd_softc * const wd =
945 	    device_lookup_private(&wd_cd, WDUNIT(bp->b_dev));
946 	int maxsectors;
947 
948 	/*
949 	 * The limit is actually 65536 for LBA48 and 256 for non-LBA48,
950 	 * but that requires to set the count for the ATA command
951 	 * to 0, which is somewhat error prone, so better stay safe.
952 	 */
953 	if (wd->sc_flags & WDF_LBA48)
954 		maxsectors = 65535;
955 	else
956 		maxsectors = 128;
957 
958 	if (bp->b_bcount > (wd->sc_blksize * maxsectors))
959 		bp->b_bcount = (wd->sc_blksize * maxsectors);
960 
961 	minphys(bp);
962 }
963 
964 static void
965 wd_iosize(device_t dev, int *count)
966 {
967 	struct buf B;
968 	int bmaj;
969 
970 	bmaj       = bdevsw_lookup_major(&wd_bdevsw);
971 	B.b_dev    = MAKEWDDEV(bmaj,device_unit(dev),RAW_PART);
972 	B.b_bcount = *count;
973 
974 	wdminphys(&B);
975 
976 	*count = B.b_bcount;
977 }
978 
979 static int
980 wdread(dev_t dev, struct uio *uio, int flags)
981 {
982 
983 	ATADEBUG_PRINT(("wdread\n"), DEBUG_XFERS);
984 	return (physio(wdstrategy, NULL, dev, B_READ, wdminphys, uio));
985 }
986 
987 static int
988 wdwrite(dev_t dev, struct uio *uio, int flags)
989 {
990 
991 	ATADEBUG_PRINT(("wdwrite\n"), DEBUG_XFERS);
992 	return (physio(wdstrategy, NULL, dev, B_WRITE, wdminphys, uio));
993 }
994 
995 static int
996 wdopen(dev_t dev, int flag, int fmt, struct lwp *l)
997 {
998 	struct wd_softc *wd;
999 	struct dk_softc *dksc;
1000 	int unit, part, error;
1001 
1002 	ATADEBUG_PRINT(("wdopen\n"), DEBUG_FUNCS);
1003 	unit = WDUNIT(dev);
1004 	wd = device_lookup_private(&wd_cd, unit);
1005 	if (wd == NULL)
1006 		return (ENXIO);
1007 	dksc = &wd->sc_dksc;
1008 
1009 	if (! device_is_active(dksc->sc_dev))
1010 		return (ENODEV);
1011 
1012 	part = WDPART(dev);
1013 
1014 	if (wd->sc_capacity == 0)
1015 		return (ENODEV);
1016 
1017 	/*
1018 	 * If any partition is open, but the disk has been invalidated,
1019 	 * disallow further opens.
1020 	 */
1021 	if ((wd->sc_flags & (WDF_OPEN | WDF_LOADED)) == WDF_OPEN) {
1022 		if (part != RAW_PART || fmt != S_IFCHR)
1023 			return EIO;
1024 	}
1025 
1026 	error = dk_open(dksc, dev, flag, fmt, l);
1027 
1028 	return error;
1029 }
1030 
1031 /*
1032  * Serialized by caller
1033  */
1034 static int
1035 wd_firstopen(device_t self, dev_t dev, int flag, int fmt)
1036 {
1037 	struct wd_softc *wd = device_private(self);
1038 	struct dk_softc *dksc = &wd->sc_dksc;
1039 	int error;
1040 
1041 	error = wd->atabus->ata_addref(wd->drvp);
1042 	if (error)
1043 		return error;
1044 
1045 	if ((wd->sc_flags & WDF_LOADED) == 0) {
1046 		int param_error;
1047 
1048 		/* Load the physical device parameters. */
1049 		param_error = wd_get_params(wd, AT_WAIT, &wd->sc_params);
1050 		if (param_error != 0) {
1051 			aprint_error_dev(dksc->sc_dev, "IDENTIFY failed\n");
1052 			error = EIO;
1053 			goto bad;
1054 		}
1055 		wd_set_geometry(wd);
1056 		wd->sc_flags |= WDF_LOADED;
1057 	}
1058 
1059 	wd->sc_flags |= WDF_OPEN;
1060 	return 0;
1061 
1062 bad:
1063 	wd->atabus->ata_delref(wd->drvp);
1064 	return error;
1065 }
1066 
1067 /*
1068  * Caller must hold wd->sc_dk.dk_openlock.
1069  */
1070 static int
1071 wd_lastclose(device_t self)
1072 {
1073 	struct wd_softc *wd = device_private(self);
1074 
1075 	wd_flushcache(wd, AT_WAIT, false);
1076 
1077 	wd->atabus->ata_delref(wd->drvp);
1078 	wd->sc_flags &= ~WDF_OPEN;
1079 
1080 	return 0;
1081 }
1082 
1083 static int
1084 wdclose(dev_t dev, int flag, int fmt, struct lwp *l)
1085 {
1086 	struct wd_softc *wd;
1087 	struct dk_softc *dksc;
1088 	int unit;
1089 
1090 	unit = WDUNIT(dev);
1091 	wd = device_lookup_private(&wd_cd, unit);
1092 	dksc = &wd->sc_dksc;
1093 
1094 	return dk_close(dksc, dev, flag, fmt, l);
1095 }
1096 
1097 void
1098 wdperror(const struct wd_softc *wd, struct ata_xfer *xfer)
1099 {
1100 	static const char *const errstr0_3[] = {"address mark not found",
1101 	    "track 0 not found", "aborted command", "media change requested",
1102 	    "id not found", "media changed", "uncorrectable data error",
1103 	    "bad block detected"};
1104 	static const char *const errstr4_5[] = {
1105 	    "obsolete (address mark not found)",
1106 	    "no media/write protected", "aborted command",
1107 	    "media change requested", "id not found", "media changed",
1108 	    "uncorrectable data error", "interface CRC error"};
1109 	const char *const *errstr;
1110 	int i;
1111 	const char *sep = "";
1112 
1113 	const struct dk_softc *dksc = &wd->sc_dksc;
1114 	const char *devname = dksc->sc_xname;
1115 	struct ata_drive_datas *drvp = wd->drvp;
1116 	int errno = xfer->c_bio.r_error;
1117 
1118 	if (drvp->ata_vers >= 4)
1119 		errstr = errstr4_5;
1120 	else
1121 		errstr = errstr0_3;
1122 
1123 	printf("%s: (", devname);
1124 
1125 	if (errno == 0)
1126 		printf("error not notified");
1127 
1128 	for (i = 0; i < 8; i++) {
1129 		if (errno & (1 << i)) {
1130 			printf("%s%s", sep, errstr[i]);
1131 			sep = ", ";
1132 		}
1133 	}
1134 	printf(")\n");
1135 }
1136 
1137 int
1138 wdioctl(dev_t dev, u_long cmd, void *addr, int flag, struct lwp *l)
1139 {
1140 	struct wd_softc *wd =
1141 	    device_lookup_private(&wd_cd, WDUNIT(dev));
1142 	struct dk_softc *dksc = &wd->sc_dksc;
1143 
1144 	ATADEBUG_PRINT(("wdioctl\n"), DEBUG_FUNCS);
1145 
1146 	if ((wd->sc_flags & WDF_LOADED) == 0)
1147 		return EIO;
1148 
1149 	switch (cmd) {
1150 #ifdef HAS_BAD144_HANDLING
1151 	case DIOCSBAD:
1152 		if ((flag & FWRITE) == 0)
1153 			return EBADF;
1154 		dksc->sc_dkdev.dk_cpulabel->bad = *(struct dkbad *)addr;
1155 		dksc->sc_dkdev.dk_label->d_flags |= D_BADSECT;
1156 		bad144intern(wd);
1157 		return 0;
1158 #endif
1159 #ifdef WD_SOFTBADSECT
1160 	case DIOCBSLIST :
1161 	{
1162 		uint32_t count, missing, skip;
1163 		struct disk_badsecinfo dbsi;
1164 		struct disk_badsectors *dbs;
1165 		size_t available;
1166 		uint8_t *laddr;
1167 
1168 		dbsi = *(struct disk_badsecinfo *)addr;
1169 		missing = wd->sc_bscount;
1170 		count = 0;
1171 		available = dbsi.dbsi_bufsize;
1172 		skip = dbsi.dbsi_skip;
1173 		laddr = (uint8_t *)dbsi.dbsi_buffer;
1174 
1175 		/*
1176 		 * We start this loop with the expectation that all of the
1177 		 * entries will be missed and decrement this counter each
1178 		 * time we either skip over one (already copied out) or
1179 		 * we actually copy it back to user space.  The structs
1180 		 * holding the bad sector information are copied directly
1181 		 * back to user space whilst the summary is returned via
1182 		 * the struct passed in via the ioctl.
1183 		 */
1184 		SLIST_FOREACH(dbs, &wd->sc_bslist, dbs_next) {
1185 			if (skip > 0) {
1186 				missing--;
1187 				skip--;
1188 				continue;
1189 			}
1190 			if (available < sizeof(*dbs))
1191 				break;
1192 			available -= sizeof(*dbs);
1193 			copyout(dbs, laddr, sizeof(*dbs));
1194 			laddr += sizeof(*dbs);
1195 			missing--;
1196 			count++;
1197 		}
1198 		dbsi.dbsi_left = missing;
1199 		dbsi.dbsi_copied = count;
1200 		*(struct disk_badsecinfo *)addr = dbsi;
1201 		return 0;
1202 	}
1203 
1204 	case DIOCBSFLUSH :
1205 		/* Clean out the bad sector list */
1206 		while (!SLIST_EMPTY(&wd->sc_bslist)) {
1207 			void *head = SLIST_FIRST(&wd->sc_bslist);
1208 			SLIST_REMOVE_HEAD(&wd->sc_bslist, dbs_next);
1209 			free(head, M_TEMP);
1210 		}
1211 		wd->sc_bscount = 0;
1212 		return 0;
1213 #endif
1214 
1215 #ifdef notyet
1216 	case DIOCWFORMAT:
1217 		if ((flag & FWRITE) == 0)
1218 			return EBADF;
1219 		{
1220 		register struct format_op *fop;
1221 		struct iovec aiov;
1222 		struct uio auio;
1223 		int error1;
1224 
1225 		fop = (struct format_op *)addr;
1226 		aiov.iov_base = fop->df_buf;
1227 		aiov.iov_len = fop->df_count;
1228 		auio.uio_iov = &aiov;
1229 		auio.uio_iovcnt = 1;
1230 		auio.uio_resid = fop->df_count;
1231 		auio.uio_offset =
1232 			fop->df_startblk * wd->sc_dk.dk_label->d_secsize;
1233 		auio.uio_vmspace = l->l_proc->p_vmspace;
1234 		error1 = physio(wdformat, NULL, dev, B_WRITE, wdminphys,
1235 		    &auio);
1236 		fop->df_count -= auio.uio_resid;
1237 		fop->df_reg[0] = wdc->sc_status;
1238 		fop->df_reg[1] = wdc->sc_error;
1239 		return error1;
1240 		}
1241 #endif
1242 	case DIOCGCACHE:
1243 		return wd_getcache(wd, (int *)addr);
1244 
1245 	case DIOCSCACHE:
1246 		return wd_setcache(wd, *(int *)addr);
1247 
1248 	case DIOCCACHESYNC:
1249 		return wd_flushcache(wd, AT_WAIT, true);
1250 
1251 	case ATAIOCCOMMAND:
1252 		/*
1253 		 * Make sure this command is (relatively) safe first
1254 		 */
1255 		if ((((atareq_t *) addr)->flags & ATACMD_READ) == 0 &&
1256 		    (flag & FWRITE) == 0)
1257 			return (EBADF);
1258 		{
1259 		struct wd_ioctl *wi;
1260 		atareq_t *atareq = (atareq_t *) addr;
1261 		int error1;
1262 
1263 		wi = wi_get(wd);
1264 		wi->wi_atareq = *atareq;
1265 
1266 		if (atareq->datalen && atareq->flags &
1267 		    (ATACMD_READ | ATACMD_WRITE)) {
1268 			void *tbuf;
1269 			if (atareq->datalen < DEV_BSIZE
1270 			    && atareq->command == WDCC_IDENTIFY) {
1271 				tbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK);
1272 				wi->wi_iov.iov_base = tbuf;
1273 				wi->wi_iov.iov_len = DEV_BSIZE;
1274 				UIO_SETUP_SYSSPACE(&wi->wi_uio);
1275 			} else {
1276 				tbuf = NULL;
1277 				wi->wi_iov.iov_base = atareq->databuf;
1278 				wi->wi_iov.iov_len = atareq->datalen;
1279 				wi->wi_uio.uio_vmspace = l->l_proc->p_vmspace;
1280 			}
1281 			wi->wi_uio.uio_iov = &wi->wi_iov;
1282 			wi->wi_uio.uio_iovcnt = 1;
1283 			wi->wi_uio.uio_resid = atareq->datalen;
1284 			wi->wi_uio.uio_offset = 0;
1285 			wi->wi_uio.uio_rw =
1286 			    (atareq->flags & ATACMD_READ) ? B_READ : B_WRITE;
1287 			error1 = physio(wdioctlstrategy, &wi->wi_bp, dev,
1288 			    (atareq->flags & ATACMD_READ) ? B_READ : B_WRITE,
1289 			    wdminphys, &wi->wi_uio);
1290 			if (tbuf != NULL && error1 == 0) {
1291 				error1 = copyout(tbuf, atareq->databuf,
1292 				    atareq->datalen);
1293 				free(tbuf, M_TEMP);
1294 			}
1295 		} else {
1296 			/* No need to call physio if we don't have any
1297 			   user data */
1298 			wi->wi_bp.b_flags = 0;
1299 			wi->wi_bp.b_data = 0;
1300 			wi->wi_bp.b_bcount = 0;
1301 			wi->wi_bp.b_dev = dev;
1302 			wi->wi_bp.b_proc = l->l_proc;
1303 			wdioctlstrategy(&wi->wi_bp);
1304 			error1 = wi->wi_bp.b_error;
1305 		}
1306 		*atareq = wi->wi_atareq;
1307 		wi_free(wi);
1308 		return(error1);
1309 		}
1310 
1311 	default:
1312 		return dk_ioctl(dksc, dev, cmd, addr, flag, l);
1313 	}
1314 
1315 #ifdef DIAGNOSTIC
1316 	panic("wdioctl: impossible");
1317 #endif
1318 }
1319 
1320 static int
1321 wd_discard(device_t dev, off_t pos, off_t len)
1322 {
1323 	struct wd_softc *wd = device_private(dev);
1324 	daddr_t bno;
1325 	long size, done;
1326 	long maxatonce, amount;
1327 	int result;
1328 
1329 	if (!(wd->sc_params.atap_ata_major & WDC_VER_ATA7)
1330 	    || !(wd->sc_params.support_dsm & ATA_SUPPORT_DSM_TRIM)) {
1331 		/* not supported; ignore request */
1332 		ATADEBUG_PRINT(("wddiscard (unsupported)\n"), DEBUG_FUNCS);
1333 		return 0;
1334 	}
1335 	maxatonce = 0xffff; /*wd->sc_params.max_dsm_blocks*/
1336 
1337 	ATADEBUG_PRINT(("wddiscard\n"), DEBUG_FUNCS);
1338 
1339 	if ((wd->sc_flags & WDF_LOADED) == 0)
1340 		return EIO;
1341 
1342 	/* round the start up and the end down */
1343 	bno = (pos + wd->sc_blksize - 1) / wd->sc_blksize;
1344 	size = ((pos + len) / wd->sc_blksize) - bno;
1345 
1346 	done = 0;
1347 	while (done < size) {
1348 	     amount = size - done;
1349 	     if (amount > maxatonce) {
1350 		     amount = maxatonce;
1351 	     }
1352 	     result = wd_trim(wd, bno + done, amount);
1353 	     if (result) {
1354 		     return result;
1355 	     }
1356 	     done += amount;
1357 	}
1358 	return 0;
1359 }
1360 
1361 static int
1362 wddiscard(dev_t dev, off_t pos, off_t len)
1363 {
1364 	struct wd_softc *wd;
1365 	struct dk_softc *dksc;
1366 	int unit;
1367 
1368 	unit = WDUNIT(dev);
1369 	wd = device_lookup_private(&wd_cd, unit);
1370 	dksc = &wd->sc_dksc;
1371 
1372 	return dk_discard(dksc, dev, pos, len);
1373 }
1374 
1375 #ifdef B_FORMAT
1376 int
1377 wdformat(struct buf *bp)
1378 {
1379 
1380 	bp->b_flags |= B_FORMAT;
1381 	return wdstrategy(bp);
1382 }
1383 #endif
1384 
1385 int
1386 wdsize(dev_t dev)
1387 {
1388 	struct wd_softc *wd;
1389 	struct dk_softc *dksc;
1390 	int unit;
1391 
1392 	ATADEBUG_PRINT(("wdsize\n"), DEBUG_FUNCS);
1393 
1394 	unit = WDUNIT(dev);
1395 	wd = device_lookup_private(&wd_cd, unit);
1396 	if (wd == NULL)
1397 		return (-1);
1398 	dksc = &wd->sc_dksc;
1399 
1400 	if (!device_is_active(dksc->sc_dev))
1401 		return (-1);
1402 
1403 	return dk_size(dksc, dev);
1404 }
1405 
1406 /*
1407  * Dump core after a system crash.
1408  */
1409 static int
1410 wddump(dev_t dev, daddr_t blkno, void *va, size_t size)
1411 {
1412 	struct wd_softc *wd;
1413 	struct dk_softc *dksc;
1414 	int unit;
1415 
1416 	/* Check if recursive dump; if so, punt. */
1417 	if (wddoingadump)
1418 		return EFAULT;
1419 	wddoingadump = 1;
1420 
1421 	unit = WDUNIT(dev);
1422 	wd = device_lookup_private(&wd_cd, unit);
1423 	if (wd == NULL)
1424 		return (ENXIO);
1425 	dksc = &wd->sc_dksc;
1426 
1427 	return dk_dump(dksc, dev, blkno, va, size);
1428 }
1429 
1430 static int
1431 wd_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
1432 {
1433 	struct wd_softc *wd = device_private(dev);
1434 	struct dk_softc *dksc = &wd->sc_dksc;
1435 	struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
1436 	struct ata_xfer *xfer;
1437 	int err;
1438 
1439 	/* Recalibrate, if first dump transfer. */
1440 	if (wddumprecalibrated == 0) {
1441 		wddumprecalibrated = 1;
1442 		(*wd->atabus->ata_reset_drive)(wd->drvp,
1443 					       AT_POLL | AT_RST_EMERG, NULL);
1444 		wd->drvp->state = RESET;
1445 	}
1446 
1447 	xfer = ata_get_xfer_ext(wd->drvp->chnl_softc, 0, 0);
1448 	if (xfer == NULL) {
1449 		printf("%s: no xfer\n", __func__);
1450 		return EAGAIN;
1451 	}
1452 
1453 	xfer->c_bio.blkno = blkno;
1454 	xfer->c_bio.flags = ATA_POLL;
1455 	if (wd->sc_flags & WDF_LBA48 &&
1456 	    (xfer->c_bio.blkno + nblk) > wd->sc_capacity28)
1457 		xfer->c_bio.flags |= ATA_LBA48;
1458 	if (wd->sc_flags & WDF_LBA)
1459 		xfer->c_bio.flags |= ATA_LBA;
1460 	xfer->c_bio.bcount = nblk * dg->dg_secsize;
1461 	xfer->c_bio.databuf = va;
1462 #ifndef WD_DUMP_NOT_TRUSTED
1463 	switch (err = wd->atabus->ata_bio(wd->drvp, xfer)) {
1464 	case ATACMD_TRY_AGAIN:
1465 		panic("wddump: try again");
1466 		break;
1467 	case ATACMD_QUEUED:
1468 		panic("wddump: polled command has been queued");
1469 		break;
1470 	case ATACMD_COMPLETE:
1471 		break;
1472 	default:
1473 		panic("wddump: unknown atacmd code %d", err);
1474 	}
1475 	switch(err = xfer->c_bio.error) {
1476 	case TIMEOUT:
1477 		printf("wddump: device timed out");
1478 		err = EIO;
1479 		break;
1480 	case ERR_DF:
1481 		printf("wddump: drive fault");
1482 		err = EIO;
1483 		break;
1484 	case ERR_DMA:
1485 		printf("wddump: DMA error");
1486 		err = EIO;
1487 		break;
1488 	case ERROR:
1489 		printf("wddump: ");
1490 		wdperror(wd, xfer);
1491 		err = EIO;
1492 		break;
1493 	case NOERROR:
1494 		err = 0;
1495 		break;
1496 	default:
1497 		panic("wddump: unknown error type %d", err);
1498 	}
1499 
1500 	if (err != 0) {
1501 		printf("\n");
1502 		return err;
1503 	}
1504 #else	/* WD_DUMP_NOT_TRUSTED */
1505 	/* Let's just talk about this first... */
1506 	printf("wd%d: dump addr 0x%x, cylin %d, head %d, sector %d\n",
1507 	    unit, va, cylin, head, sector);
1508 	delay(500 * 1000);	/* half a second */
1509 #endif
1510 
1511 	wddoingadump = 0;
1512 	return 0;
1513 }
1514 
1515 #ifdef HAS_BAD144_HANDLING
1516 /*
1517  * Internalize the bad sector table.
1518  */
1519 void
1520 bad144intern(struct wd_softc *wd)
1521 {
1522 	struct dk_softc *dksc = &wd->sc_dksc;
1523 	struct dkbad *bt = &dksc->sc_dkdev.dk_cpulabel->bad;
1524 	struct disklabel *lp = dksc->sc_dkdev.dk_label;
1525 	int i = 0;
1526 
1527 	ATADEBUG_PRINT(("bad144intern\n"), DEBUG_XFERS);
1528 
1529 	for (; i < NBT_BAD; i++) {
1530 		if (bt->bt_bad[i].bt_cyl == 0xffff)
1531 			break;
1532 		wd->drvp->badsect[i] =
1533 		    bt->bt_bad[i].bt_cyl * lp->d_secpercyl +
1534 		    (bt->bt_bad[i].bt_trksec >> 8) * lp->d_nsectors +
1535 		    (bt->bt_bad[i].bt_trksec & 0xff);
1536 	}
1537 	for (; i < NBT_BAD+1; i++)
1538 		wd->drvp->badsect[i] = -1;
1539 }
1540 #endif
1541 
1542 static void
1543 wd_set_geometry(struct wd_softc *wd)
1544 {
1545 	struct dk_softc *dksc = &wd->sc_dksc;
1546 	struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
1547 
1548 	memset(dg, 0, sizeof(*dg));
1549 
1550 	dg->dg_secperunit = wd->sc_capacity;
1551 	dg->dg_secsize = wd->sc_blksize;
1552 	dg->dg_nsectors = wd->sc_params.atap_sectors;
1553 	dg->dg_ntracks = wd->sc_params.atap_heads;
1554 	if ((wd->sc_flags & WDF_LBA) == 0)
1555 		dg->dg_ncylinders = wd->sc_params.atap_cylinders;
1556 
1557 	disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, NULL);
1558 }
1559 
1560 int
1561 wd_get_params(struct wd_softc *wd, uint8_t flags, struct ataparams *params)
1562 {
1563 
1564 	switch (wd->atabus->ata_get_params(wd->drvp, flags, params)) {
1565 	case CMD_AGAIN:
1566 		return 1;
1567 	case CMD_ERR:
1568 		if (wd->drvp->drive_type != ATA_DRIVET_OLD)
1569 			return 1;
1570 		/*
1571 		 * We `know' there's a drive here; just assume it's old.
1572 		 * This geometry is only used to read the MBR and print a
1573 		 * (false) attach message.
1574 		 */
1575 		strncpy(params->atap_model, "ST506",
1576 		    sizeof params->atap_model);
1577 		params->atap_config = ATA_CFG_FIXED;
1578 		params->atap_cylinders = 1024;
1579 		params->atap_heads = 8;
1580 		params->atap_sectors = 17;
1581 		params->atap_multi = 1;
1582 		params->atap_capabilities1 = params->atap_capabilities2 = 0;
1583 		wd->drvp->ata_vers = -1; /* Mark it as pre-ATA */
1584 		/* FALLTHROUGH */
1585 	case CMD_OK:
1586 		return 0;
1587 	default:
1588 		panic("wd_get_params: bad return code from ata_get_params");
1589 		/* NOTREACHED */
1590 	}
1591 }
1592 
1593 int
1594 wd_getcache(struct wd_softc *wd, int *bitsp)
1595 {
1596 	struct ataparams params;
1597 
1598 	if (wd_get_params(wd, AT_WAIT, &params) != 0)
1599 		return EIO;
1600 	if (params.atap_cmd_set1 == 0x0000 ||
1601 	    params.atap_cmd_set1 == 0xffff ||
1602 	    (params.atap_cmd_set1 & WDC_CMD1_CACHE) == 0) {
1603 		*bitsp = 0;
1604 		return 0;
1605 	}
1606 	*bitsp = DKCACHE_WCHANGE | DKCACHE_READ;
1607 	if (params.atap_cmd1_en & WDC_CMD1_CACHE)
1608 		*bitsp |= DKCACHE_WRITE;
1609 
1610 	if (WD_USE_NCQ(wd) || (wd->drvp->drive_flags & ATA_DRIVE_WFUA))
1611 		*bitsp |= DKCACHE_FUA;
1612 
1613 	return 0;
1614 }
1615 
1616 const char at_errbits[] = "\20\10ERROR\11TIMEOU\12DF";
1617 
1618 int
1619 wd_setcache(struct wd_softc *wd, int bits)
1620 {
1621 	struct dk_softc *dksc = &wd->sc_dksc;
1622 	struct ataparams params;
1623 	struct ata_xfer *xfer;
1624 	int error;
1625 
1626 	if (wd_get_params(wd, AT_WAIT, &params) != 0)
1627 		return EIO;
1628 
1629 	if (params.atap_cmd_set1 == 0x0000 ||
1630 	    params.atap_cmd_set1 == 0xffff ||
1631 	    (params.atap_cmd_set1 & WDC_CMD1_CACHE) == 0)
1632 		return EOPNOTSUPP;
1633 
1634 	if ((bits & DKCACHE_READ) == 0 ||
1635 	    (bits & DKCACHE_SAVE) != 0)
1636 		return EOPNOTSUPP;
1637 
1638 	xfer = ata_get_xfer(wd->drvp->chnl_softc);
1639 	if (xfer == NULL)
1640 		return EINTR;
1641 
1642 	xfer->c_ata_c.r_command = SET_FEATURES;
1643 	xfer->c_ata_c.r_st_bmask = 0;
1644 	xfer->c_ata_c.r_st_pmask = 0;
1645 	xfer->c_ata_c.timeout = 30000; /* 30s timeout */
1646 	xfer->c_ata_c.flags = AT_WAIT;
1647 	if (bits & DKCACHE_WRITE)
1648 		xfer->c_ata_c.r_features = WDSF_WRITE_CACHE_EN;
1649 	else
1650 		xfer->c_ata_c.r_features = WDSF_WRITE_CACHE_DS;
1651 	if (wd->atabus->ata_exec_command(wd->drvp, xfer) != ATACMD_COMPLETE) {
1652 		aprint_error_dev(dksc->sc_dev,
1653 		    "wd_setcache command not complete\n");
1654 		error = EIO;
1655 		goto out;
1656 	}
1657 
1658 	if (xfer->c_ata_c.flags & (AT_ERROR | AT_TIMEOU | AT_DF)) {
1659 		char sbuf[sizeof(at_errbits) + 64];
1660 		snprintb(sbuf, sizeof(sbuf), at_errbits, xfer->c_ata_c.flags);
1661 		aprint_error_dev(dksc->sc_dev, "wd_setcache: status=%s\n", sbuf);
1662 		error = EIO;
1663 		goto out;
1664 	}
1665 
1666 	error = 0;
1667 
1668 out:
1669 	ata_free_xfer(wd->drvp->chnl_softc, xfer);
1670 	ata_channel_start(wd->drvp->chnl_softc, wd->drvp->drive);
1671 	return error;
1672 }
1673 
1674 static int
1675 wd_standby(struct wd_softc *wd, int flags)
1676 {
1677 	struct dk_softc *dksc = &wd->sc_dksc;
1678 	struct ata_xfer *xfer;
1679 	int error;
1680 
1681 	xfer = ata_get_xfer(wd->drvp->chnl_softc);
1682 	if (xfer == NULL)
1683 		return EINTR;
1684 
1685 	xfer->c_ata_c.r_command = WDCC_STANDBY_IMMED;
1686 	xfer->c_ata_c.r_st_bmask = WDCS_DRDY;
1687 	xfer->c_ata_c.r_st_pmask = WDCS_DRDY;
1688 	xfer->c_ata_c.flags = flags;
1689 	xfer->c_ata_c.timeout = 30000; /* 30s timeout */
1690 	if (wd->atabus->ata_exec_command(wd->drvp, xfer) != ATACMD_COMPLETE) {
1691 		aprint_error_dev(dksc->sc_dev,
1692 		    "standby immediate command didn't complete\n");
1693 		error = EIO;
1694 		goto out;
1695 	}
1696 	if (xfer->c_ata_c.flags & AT_ERROR) {
1697 		if (xfer->c_ata_c.r_error == WDCE_ABRT) {
1698 			/* command not supported */
1699 			error = ENODEV;
1700 			goto out;
1701 		}
1702 	}
1703 	if (xfer->c_ata_c.flags & (AT_ERROR | AT_TIMEOU | AT_DF)) {
1704 		char sbuf[sizeof(at_errbits) + 64];
1705 		snprintb(sbuf, sizeof(sbuf), at_errbits, xfer->c_ata_c.flags);
1706 		aprint_error_dev(dksc->sc_dev, "wd_standby: status=%s\n", sbuf);
1707 		error = EIO;
1708 		goto out;
1709 	}
1710 	error = 0;
1711 
1712 out:
1713 	ata_free_xfer(wd->drvp->chnl_softc, xfer);
1714 	/* drive is supposed to go idle, do not call ata_channel_start() */
1715 	return error;
1716 }
1717 
1718 int
1719 wd_flushcache(struct wd_softc *wd, int flags, bool start)
1720 {
1721 	struct dk_softc *dksc = &wd->sc_dksc;
1722 	struct ata_xfer *xfer;
1723 	int error;
1724 
1725 	/*
1726 	 * WDCC_FLUSHCACHE is here since ATA-4, but some drives report
1727 	 * only ATA-2 and still support it.
1728 	 */
1729 	if (wd->drvp->ata_vers < 4 &&
1730 	    ((wd->sc_params.atap_cmd_set2 & WDC_CMD2_FC) == 0 ||
1731 	    wd->sc_params.atap_cmd_set2 == 0xffff))
1732 		return ENODEV;
1733 
1734 	mutex_enter(&wd->sc_lock);
1735 	SET(wd->sc_flags, WDF_FLUSH_PEND);
1736 	mutex_exit(&wd->sc_lock);
1737 
1738 	xfer = ata_get_xfer(wd->drvp->chnl_softc);
1739 
1740 	mutex_enter(&wd->sc_lock);
1741 	CLR(wd->sc_flags, WDF_FLUSH_PEND);
1742 	mutex_exit(&wd->sc_lock);
1743 
1744 	if (xfer == NULL) {
1745 		error = EINTR;
1746 		goto out;
1747 	}
1748 
1749 	if ((wd->sc_params.atap_cmd2_en & ATA_CMD2_LBA48) != 0 &&
1750 	    (wd->sc_params.atap_cmd2_en & ATA_CMD2_FCE) != 0) {
1751 		xfer->c_ata_c.r_command = WDCC_FLUSHCACHE_EXT;
1752 		flags |= AT_LBA48;
1753 	} else
1754 		xfer->c_ata_c.r_command = WDCC_FLUSHCACHE;
1755 	xfer->c_ata_c.r_st_bmask = WDCS_DRDY;
1756 	xfer->c_ata_c.r_st_pmask = WDCS_DRDY;
1757 	xfer->c_ata_c.flags = flags | AT_READREG;
1758 	xfer->c_ata_c.timeout = 300000; /* 5m timeout */
1759 	if (wd->atabus->ata_exec_command(wd->drvp, xfer) != ATACMD_COMPLETE) {
1760 		aprint_error_dev(dksc->sc_dev,
1761 		    "flush cache command didn't complete\n");
1762 		error = EIO;
1763 		goto out_xfer;
1764 	}
1765 	if (xfer->c_ata_c.flags & AT_ERROR) {
1766 		if (xfer->c_ata_c.r_error == WDCE_ABRT) {
1767 			/* command not supported */
1768 			error = ENODEV;
1769 			goto out_xfer;
1770 		}
1771 	}
1772 	if (xfer->c_ata_c.flags & (AT_ERROR | AT_TIMEOU | AT_DF)) {
1773 		char sbuf[sizeof(at_errbits) + 64];
1774 		snprintb(sbuf, sizeof(sbuf), at_errbits, xfer->c_ata_c.flags);
1775 		aprint_error_dev(dksc->sc_dev, "wd_flushcache: status=%s\n",
1776 		    sbuf);
1777 		error = EIO;
1778 		goto out_xfer;
1779 	}
1780 	error = 0;
1781 
1782 out_xfer:
1783 	ata_free_xfer(wd->drvp->chnl_softc, xfer);
1784 
1785 out:
1786 	/* kick queue processing blocked while waiting for flush xfer */
1787 	if (start)
1788 		ata_channel_start(wd->drvp->chnl_softc, wd->drvp->drive);
1789 
1790 	return error;
1791 }
1792 
1793 static int
1794 wd_trim(struct wd_softc *wd, daddr_t bno, long size)
1795 {
1796 	struct dk_softc *dksc = &wd->sc_dksc;
1797 	struct ata_xfer *xfer;
1798 	int error;
1799 	unsigned char *req;
1800 
1801 	xfer = ata_get_xfer(wd->drvp->chnl_softc);
1802 	if (xfer == NULL)
1803 		return EINTR;
1804 
1805 	req = kmem_zalloc(512, KM_SLEEP);
1806 	req[0] = bno & 0xff;
1807 	req[1] = (bno >> 8) & 0xff;
1808 	req[2] = (bno >> 16) & 0xff;
1809 	req[3] = (bno >> 24) & 0xff;
1810 	req[4] = (bno >> 32) & 0xff;
1811 	req[5] = (bno >> 40) & 0xff;
1812 	req[6] = size & 0xff;
1813 	req[7] = (size >> 8) & 0xff;
1814 
1815 	xfer->c_ata_c.r_command = ATA_DATA_SET_MANAGEMENT;
1816 	xfer->c_ata_c.r_count = 1;
1817 	xfer->c_ata_c.r_features = ATA_SUPPORT_DSM_TRIM;
1818 	xfer->c_ata_c.r_st_bmask = WDCS_DRDY;
1819 	xfer->c_ata_c.r_st_pmask = WDCS_DRDY;
1820 	xfer->c_ata_c.timeout = 30000; /* 30s timeout */
1821 	xfer->c_ata_c.data = req;
1822 	xfer->c_ata_c.bcount = 512;
1823 	xfer->c_ata_c.flags |= AT_WRITE | AT_WAIT;
1824 	if (wd->atabus->ata_exec_command(wd->drvp, xfer) != ATACMD_COMPLETE) {
1825 		aprint_error_dev(dksc->sc_dev,
1826 		    "trim command didn't complete\n");
1827 		kmem_free(req, 512);
1828 		error = EIO;
1829 		goto out;
1830 	}
1831 	kmem_free(req, 512);
1832 	if (xfer->c_ata_c.flags & AT_ERROR) {
1833 		if (xfer->c_ata_c.r_error == WDCE_ABRT) {
1834 			/* command not supported */
1835 			error = ENODEV;
1836 			goto out;
1837 		}
1838 	}
1839 	if (xfer->c_ata_c.flags & (AT_ERROR | AT_TIMEOU | AT_DF)) {
1840 		char sbuf[sizeof(at_errbits) + 64];
1841 		snprintb(sbuf, sizeof(sbuf), at_errbits, xfer->c_ata_c.flags);
1842 		aprint_error_dev(dksc->sc_dev, "wd_trim: status=%s\n",
1843 		    sbuf);
1844 		error = EIO;
1845 		goto out;
1846 	}
1847 	error = 0;
1848 
1849 out:
1850 	ata_free_xfer(wd->drvp->chnl_softc, xfer);
1851 	ata_channel_start(wd->drvp->chnl_softc, wd->drvp->drive);
1852 	return error;
1853 }
1854 
1855 bool
1856 wd_shutdown(device_t dev, int how)
1857 {
1858 	struct wd_softc *wd = device_private(dev);
1859 
1860 	/* the adapter needs to be enabled */
1861 	if (wd->atabus->ata_addref(wd->drvp))
1862 		return true; /* no need to complain */
1863 
1864 	wd_flushcache(wd, AT_POLL, false);
1865 	if ((how & RB_POWERDOWN) == RB_POWERDOWN)
1866 		wd_standby(wd, AT_POLL);
1867 	return true;
1868 }
1869 
1870 /*
1871  * Allocate space for a ioctl queue structure.  Mostly taken from
1872  * scsipi_ioctl.c
1873  */
1874 struct wd_ioctl *
1875 wi_get(struct wd_softc *wd)
1876 {
1877 	struct wd_ioctl *wi;
1878 
1879 	wi = malloc(sizeof(struct wd_ioctl), M_TEMP, M_WAITOK|M_ZERO);
1880 	wi->wi_softc = wd;
1881 	buf_init(&wi->wi_bp);
1882 
1883 	return (wi);
1884 }
1885 
1886 /*
1887  * Free an ioctl structure and remove it from our list
1888  */
1889 
1890 void
1891 wi_free(struct wd_ioctl *wi)
1892 {
1893 	buf_destroy(&wi->wi_bp);
1894 	free(wi, M_TEMP);
1895 }
1896 
1897 /*
1898  * Find a wd_ioctl structure based on the struct buf.
1899  */
1900 
1901 struct wd_ioctl *
1902 wi_find(struct buf *bp)
1903 {
1904 	return container_of(bp, struct wd_ioctl, wi_bp);
1905 }
1906 
1907 static uint
1908 wi_sector_size(const struct wd_ioctl * const wi)
1909 {
1910 	switch (wi->wi_atareq.command) {
1911 	case WDCC_READ:
1912 	case WDCC_WRITE:
1913 	case WDCC_READMULTI:
1914 	case WDCC_WRITEMULTI:
1915 	case WDCC_READDMA:
1916 	case WDCC_WRITEDMA:
1917 	case WDCC_READ_EXT:
1918 	case WDCC_WRITE_EXT:
1919 	case WDCC_READMULTI_EXT:
1920 	case WDCC_WRITEMULTI_EXT:
1921 	case WDCC_READDMA_EXT:
1922 	case WDCC_WRITEDMA_EXT:
1923 	case WDCC_READ_FPDMA_QUEUED:
1924 	case WDCC_WRITE_FPDMA_QUEUED:
1925 		return wi->wi_softc->sc_blksize;
1926 	default:
1927 		return 512;
1928 	}
1929 }
1930 
1931 /*
1932  * Ioctl pseudo strategy routine
1933  *
1934  * This is mostly stolen from scsipi_ioctl.c:scsistrategy().  What
1935  * happens here is:
1936  *
1937  * - wdioctl() queues a wd_ioctl structure.
1938  *
1939  * - wdioctl() calls physio/wdioctlstrategy based on whether or not
1940  *   user space I/O is required.  If physio() is called, physio() eventually
1941  *   calls wdioctlstrategy().
1942  *
1943  * - In either case, wdioctlstrategy() calls wd->atabus->ata_exec_command()
1944  *   to perform the actual command
1945  *
1946  * The reason for the use of the pseudo strategy routine is because
1947  * when doing I/O to/from user space, physio _really_ wants to be in
1948  * the loop.  We could put the entire buffer into the ioctl request
1949  * structure, but that won't scale if we want to do things like download
1950  * microcode.
1951  */
1952 
1953 void
1954 wdioctlstrategy(struct buf *bp)
1955 {
1956 	struct wd_ioctl *wi;
1957 	struct ata_xfer *xfer;
1958 	int error = 0;
1959 
1960 	wi = wi_find(bp);
1961 	if (wi == NULL) {
1962 		printf("wdioctlstrategy: "
1963 		    "No matching ioctl request found in queue\n");
1964 		error = EINVAL;
1965 		goto out2;
1966 	}
1967 
1968 	xfer = ata_get_xfer(wi->wi_softc->drvp->chnl_softc);
1969 	if (xfer == NULL) {
1970 		error = EINTR;
1971 		goto out2;
1972 	}
1973 
1974 	/*
1975 	 * Abort if physio broke up the transfer
1976 	 */
1977 
1978 	if (bp->b_bcount != wi->wi_atareq.datalen) {
1979 		printf("physio split wd ioctl request... cannot proceed\n");
1980 		error = EIO;
1981 		goto out;
1982 	}
1983 
1984 	/*
1985 	 * Abort if we didn't get a buffer size that was a multiple of
1986 	 * our sector size (or overflows CHS/LBA28 sector count)
1987 	 */
1988 
1989 	if ((bp->b_bcount % wi_sector_size(wi)) != 0 ||
1990 	    (bp->b_bcount / wi_sector_size(wi)) >=
1991 	     (1 << NBBY)) {
1992 		error = EINVAL;
1993 		goto out;
1994 	}
1995 
1996 	/*
1997 	 * Make sure a timeout was supplied in the ioctl request
1998 	 */
1999 
2000 	if (wi->wi_atareq.timeout == 0) {
2001 		error = EINVAL;
2002 		goto out;
2003 	}
2004 
2005 	if (wi->wi_atareq.flags & ATACMD_READ)
2006 		xfer->c_ata_c.flags |= AT_READ;
2007 	else if (wi->wi_atareq.flags & ATACMD_WRITE)
2008 		xfer->c_ata_c.flags |= AT_WRITE;
2009 
2010 	if (wi->wi_atareq.flags & ATACMD_READREG)
2011 		xfer->c_ata_c.flags |= AT_READREG;
2012 
2013 	if ((wi->wi_atareq.flags & ATACMD_LBA) != 0)
2014 		xfer->c_ata_c.flags |= AT_LBA;
2015 
2016 	xfer->c_ata_c.flags |= AT_WAIT;
2017 
2018 	xfer->c_ata_c.timeout = wi->wi_atareq.timeout;
2019 	xfer->c_ata_c.r_command = wi->wi_atareq.command;
2020 	xfer->c_ata_c.r_lba = ((wi->wi_atareq.head & 0x0f) << 24) |
2021 	    (wi->wi_atareq.cylinder << 8) |
2022 	    wi->wi_atareq.sec_num;
2023 	xfer->c_ata_c.r_count = wi->wi_atareq.sec_count;
2024 	xfer->c_ata_c.r_features = wi->wi_atareq.features;
2025 	xfer->c_ata_c.r_st_bmask = WDCS_DRDY;
2026 	xfer->c_ata_c.r_st_pmask = WDCS_DRDY;
2027 	xfer->c_ata_c.data = wi->wi_bp.b_data;
2028 	xfer->c_ata_c.bcount = wi->wi_bp.b_bcount;
2029 
2030 	if (wi->wi_softc->atabus->ata_exec_command(wi->wi_softc->drvp, xfer)
2031 	    != ATACMD_COMPLETE) {
2032 		wi->wi_atareq.retsts = ATACMD_ERROR;
2033 		error = EIO;
2034 		goto out;
2035 	}
2036 
2037 	if (xfer->c_ata_c.flags & (AT_ERROR | AT_TIMEOU | AT_DF)) {
2038 		if (xfer->c_ata_c.flags & AT_ERROR) {
2039 			wi->wi_atareq.retsts = ATACMD_ERROR;
2040 			wi->wi_atareq.error = xfer->c_ata_c.r_error;
2041 		} else if (xfer->c_ata_c.flags & AT_DF)
2042 			wi->wi_atareq.retsts = ATACMD_DF;
2043 		else
2044 			wi->wi_atareq.retsts = ATACMD_TIMEOUT;
2045 	} else {
2046 		wi->wi_atareq.retsts = ATACMD_OK;
2047 		if (wi->wi_atareq.flags & ATACMD_READREG) {
2048 			wi->wi_atareq.command = xfer->c_ata_c.r_status;
2049 			wi->wi_atareq.features = xfer->c_ata_c.r_error;
2050 			wi->wi_atareq.sec_count = xfer->c_ata_c.r_count;
2051 			wi->wi_atareq.sec_num = xfer->c_ata_c.r_lba & 0xff;
2052 			wi->wi_atareq.head = (xfer->c_ata_c.r_device & 0xf0) |
2053 			    ((xfer->c_ata_c.r_lba >> 24) & 0x0f);
2054 			wi->wi_atareq.cylinder =
2055 			    (xfer->c_ata_c.r_lba >> 8) & 0xffff;
2056 			wi->wi_atareq.error = xfer->c_ata_c.r_error;
2057 		}
2058 	}
2059 
2060 out:
2061 	ata_free_xfer(wi->wi_softc->drvp->chnl_softc, xfer);
2062 	ata_channel_start(wi->wi_softc->drvp->chnl_softc,
2063 	    wi->wi_softc->drvp->drive);
2064 out2:
2065 	bp->b_error = error;
2066 	if (error)
2067 		bp->b_resid = bp->b_bcount;
2068 	biodone(bp);
2069 }
2070 
2071 static void
2072 wd_sysctl_attach(struct wd_softc *wd)
2073 {
2074 	struct dk_softc *dksc = &wd->sc_dksc;
2075 	const struct sysctlnode *node;
2076 	int error;
2077 
2078 	/* sysctl set-up */
2079 	if (sysctl_createv(&wd->nodelog, 0, NULL, &node,
2080 				0, CTLTYPE_NODE, dksc->sc_xname,
2081 				SYSCTL_DESCR("wd driver settings"),
2082 				NULL, 0, NULL, 0,
2083 				CTL_HW, CTL_CREATE, CTL_EOL) != 0) {
2084 		aprint_error_dev(dksc->sc_dev,
2085 		    "could not create %s.%s sysctl node\n",
2086 		    "hw", dksc->sc_xname);
2087 		return;
2088 	}
2089 
2090 	wd->drv_max_tags = ATA_MAX_OPENINGS;
2091 	if ((error = sysctl_createv(&wd->nodelog, 0, NULL, NULL,
2092 				CTLFLAG_READWRITE, CTLTYPE_INT, "max_tags",
2093 				SYSCTL_DESCR("max number of NCQ tags to use"),
2094 				NULL, 0, &wd->drv_max_tags, 0,
2095 				CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL))
2096 				!= 0) {
2097 		aprint_error_dev(dksc->sc_dev,
2098 		    "could not create %s.%s.max_tags sysctl - error %d\n",
2099 		    "hw", dksc->sc_xname, error);
2100 		return;
2101 	}
2102 
2103 	wd->drv_ncq = true;
2104 	if ((error = sysctl_createv(&wd->nodelog, 0, NULL, NULL,
2105 				CTLFLAG_READWRITE, CTLTYPE_BOOL, "use_ncq",
2106 				SYSCTL_DESCR("use NCQ if supported"),
2107 				NULL, 0, &wd->drv_ncq, 0,
2108 				CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL))
2109 				!= 0) {
2110 		aprint_error_dev(dksc->sc_dev,
2111 		    "could not create %s.%s.use_ncq sysctl - error %d\n",
2112 		    "hw", dksc->sc_xname, error);
2113 		return;
2114 	}
2115 
2116 	wd->drv_ncq_prio = false;
2117 	if ((error = sysctl_createv(&wd->nodelog, 0, NULL, NULL,
2118 				CTLFLAG_READWRITE, CTLTYPE_BOOL, "use_ncq_prio",
2119 				SYSCTL_DESCR("use NCQ PRIORITY if supported"),
2120 				NULL, 0, &wd->drv_ncq_prio, 0,
2121 				CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL))
2122 				!= 0) {
2123 		aprint_error_dev(dksc->sc_dev,
2124 		    "could not create %s.%s.use_ncq_prio sysctl - error %d\n",
2125 		    "hw", dksc->sc_xname, error);
2126 		return;
2127 	}
2128 
2129 #ifdef WD_CHAOS_MONKEY
2130 	wd->drv_chaos_freq = 0;
2131 	if ((error = sysctl_createv(&wd->nodelog, 0, NULL, NULL,
2132 				CTLFLAG_READWRITE, CTLTYPE_INT, "chaos_freq",
2133 				SYSCTL_DESCR("simulated bio read error rate"),
2134 				NULL, 0, &wd->drv_chaos_freq, 0,
2135 				CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL))
2136 				!= 0) {
2137 		aprint_error_dev(dksc->sc_dev,
2138 		    "could not create %s.%s.chaos_freq sysctl - error %d\n",
2139 		    "hw", dksc->sc_xname, error);
2140 		return;
2141 	}
2142 
2143 	wd->drv_chaos_cnt = 0;
2144 	if ((error = sysctl_createv(&wd->nodelog, 0, NULL, NULL,
2145 				CTLFLAG_READONLY, CTLTYPE_INT, "chaos_cnt",
2146 				SYSCTL_DESCR("number of processed bio reads"),
2147 				NULL, 0, &wd->drv_chaos_cnt, 0,
2148 				CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL))
2149 				!= 0) {
2150 		aprint_error_dev(dksc->sc_dev,
2151 		    "could not create %s.%s.chaos_cnt sysctl - error %d\n",
2152 		    "hw", dksc->sc_xname, error);
2153 		return;
2154 	}
2155 #endif
2156 
2157 }
2158 
2159 static void
2160 wd_sysctl_detach(struct wd_softc *wd)
2161 {
2162 	sysctl_teardown(&wd->nodelog);
2163 }
2164 
2165