xref: /netbsd-src/sys/dev/ata/wd.c (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 /*	$NetBSD: wd.c,v 1.465 2020/09/28 12:47:49 jakllsch Exp $ */
2 
3 /*
4  * Copyright (c) 1998, 2001 Manuel Bouyer.  All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *	notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *	notice, this list of conditions and the following disclaimer in the
13  *	documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /*-
28  * Copyright (c) 1998, 2003, 2004 The NetBSD Foundation, Inc.
29  * All rights reserved.
30  *
31  * This code is derived from software contributed to The NetBSD Foundation
32  * by Charles M. Hannum and by Onno van der Linden.
33  *
34  * Redistribution and use in source and binary forms, with or without
35  * modification, are permitted provided that the following conditions
36  * are met:
37  * 1. Redistributions of source code must retain the above copyright
38  *    notice, this list of conditions and the following disclaimer.
39  * 2. Redistributions in binary form must reproduce the above copyright
40  *    notice, this list of conditions and the following disclaimer in the
41  *    documentation and/or other materials provided with the distribution.
42  *
43  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
44  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
45  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
46  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
47  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
48  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
49  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
50  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
51  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
52  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
53  * POSSIBILITY OF SUCH DAMAGE.
54  */
55 
56 #include <sys/cdefs.h>
57 __KERNEL_RCSID(0, "$NetBSD: wd.c,v 1.465 2020/09/28 12:47:49 jakllsch Exp $");
58 
59 #include "opt_ata.h"
60 #include "opt_wd.h"
61 
62 #include <sys/param.h>
63 #include <sys/systm.h>
64 #include <sys/kernel.h>
65 #include <sys/conf.h>
66 #include <sys/file.h>
67 #include <sys/stat.h>
68 #include <sys/ioctl.h>
69 #include <sys/buf.h>
70 #include <sys/bufq.h>
71 #include <sys/uio.h>
72 #include <sys/device.h>
73 #include <sys/disklabel.h>
74 #include <sys/disk.h>
75 #include <sys/syslog.h>
76 #include <sys/proc.h>
77 #include <sys/reboot.h>
78 #include <sys/vnode.h>
79 #include <sys/rndsource.h>
80 
81 #include <sys/intr.h>
82 #include <sys/bus.h>
83 
84 #include <dev/ata/atareg.h>
85 #include <dev/ata/atavar.h>
86 #include <dev/ata/wdvar.h>
87 #include <dev/ic/wdcreg.h>
88 #include <sys/ataio.h>
89 #include "locators.h"
90 
91 #include <prop/proplib.h>
92 
93 #define	WDIORETRIES_SINGLE 4	/* number of retries for single-sector */
94 #define	WDIORETRIES	5	/* number of retries before giving up */
95 #define	RECOVERYTIME hz/2	/* time to wait before retrying a cmd */
96 
97 #define	WDUNIT(dev)		DISKUNIT(dev)
98 #define	WDPART(dev)		DISKPART(dev)
99 #define	WDMINOR(unit, part)	DISKMINOR(unit, part)
100 #define	MAKEWDDEV(maj, unit, part)	MAKEDISKDEV(maj, unit, part)
101 
102 #define	WDLABELDEV(dev)	(MAKEWDDEV(major(dev), WDUNIT(dev), RAW_PART))
103 
104 #define DEBUG_FUNCS  0x08
105 #define DEBUG_PROBE  0x10
106 #define DEBUG_DETACH 0x20
107 #define	DEBUG_XFERS  0x40
108 #ifdef ATADEBUG
109 #ifndef ATADEBUG_WD_MASK
110 #define ATADEBUG_WD_MASK 0x0
111 #endif
112 int wdcdebug_wd_mask = ATADEBUG_WD_MASK;
113 #define ATADEBUG_PRINT(args, level) \
114 	if (wdcdebug_wd_mask & (level)) \
115 		printf args
116 #else
117 #define ATADEBUG_PRINT(args, level)
118 #endif
119 
120 static int	wdprobe(device_t, cfdata_t, void *);
121 static void	wdattach(device_t, device_t, void *);
122 static int	wddetach(device_t, int);
123 static void	wdperror(const struct wd_softc *, struct ata_xfer *);
124 
125 static void	wdminphys(struct buf *);
126 
127 static int	wd_firstopen(device_t, dev_t, int, int);
128 static int	wd_lastclose(device_t);
129 static bool	wd_suspend(device_t, const pmf_qual_t *);
130 static int	wd_standby(struct wd_softc *, int);
131 
132 CFATTACH_DECL3_NEW(wd, sizeof(struct wd_softc),
133     wdprobe, wdattach, wddetach, NULL, NULL, NULL, DVF_DETACH_SHUTDOWN);
134 
135 extern struct cfdriver wd_cd;
136 
137 static dev_type_open(wdopen);
138 static dev_type_close(wdclose);
139 static dev_type_read(wdread);
140 static dev_type_write(wdwrite);
141 static dev_type_ioctl(wdioctl);
142 static dev_type_strategy(wdstrategy);
143 static dev_type_dump(wddump);
144 static dev_type_size(wdsize);
145 static dev_type_discard(wddiscard);
146 
147 const struct bdevsw wd_bdevsw = {
148 	.d_open = wdopen,
149 	.d_close = wdclose,
150 	.d_strategy = wdstrategy,
151 	.d_ioctl = wdioctl,
152 	.d_dump = wddump,
153 	.d_psize = wdsize,
154 	.d_discard = wddiscard,
155 	.d_flag = D_DISK
156 };
157 
158 const struct cdevsw wd_cdevsw = {
159 	.d_open = wdopen,
160 	.d_close = wdclose,
161 	.d_read = wdread,
162 	.d_write = wdwrite,
163 	.d_ioctl = wdioctl,
164 	.d_stop = nostop,
165 	.d_tty = notty,
166 	.d_poll = nopoll,
167 	.d_mmap = nommap,
168 	.d_kqfilter = nokqfilter,
169 	.d_discard = wddiscard,
170 	.d_flag = D_DISK
171 };
172 
173 /* #define WD_DUMP_NOT_TRUSTED if you just want to watch */
174 static int wddoingadump = 0;
175 static int wddumprecalibrated = 0;
176 
177 /*
178  * Glue necessary to hook WDCIOCCOMMAND into physio
179  */
180 
181 struct wd_ioctl {
182 	LIST_ENTRY(wd_ioctl) wi_list;
183 	struct buf wi_bp;
184 	struct uio wi_uio;
185 	struct iovec wi_iov;
186 	atareq_t wi_atareq;
187 	struct wd_softc *wi_softc;
188 };
189 
190 static struct	wd_ioctl *wi_find(struct buf *);
191 static void	wi_free(struct wd_ioctl *);
192 static struct	wd_ioctl *wi_get(struct wd_softc *);
193 static void	wdioctlstrategy(struct buf *);
194 
195 static void	wdrestart(void *);
196 static void	wdstart1(struct wd_softc *, struct buf *, struct ata_xfer *);
197 static int	wd_diskstart(device_t, struct buf *);
198 static int	wd_dumpblocks(device_t, void *, daddr_t, int);
199 static void	wd_iosize(device_t, int *);
200 static int	wd_discard(device_t, off_t, off_t);
201 static void	wdbioretry(void *);
202 static void	wdbiorequeue(void *);
203 static void	wddone(device_t, struct ata_xfer *);
204 static int	wd_get_params(struct wd_softc *, struct ataparams *);
205 static void	wd_set_geometry(struct wd_softc *);
206 static int	wd_flushcache(struct wd_softc *, int);
207 static int	wd_trim(struct wd_softc *, daddr_t, long);
208 static bool	wd_shutdown(device_t, int);
209 
210 static int wd_getcache(struct wd_softc *, int *);
211 static int wd_setcache(struct wd_softc *, int);
212 
213 static void wd_sysctl_attach(struct wd_softc *);
214 static void wd_sysctl_detach(struct wd_softc *);
215 
216 static const struct dkdriver wddkdriver = {
217 	.d_open = wdopen,
218 	.d_close = wdclose,
219 	.d_strategy = wdstrategy,
220 	.d_minphys = wdminphys,
221 	.d_diskstart = wd_diskstart,
222 	.d_dumpblocks = wd_dumpblocks,
223 	.d_iosize = wd_iosize,
224 	.d_firstopen = wd_firstopen,
225 	.d_lastclose = wd_lastclose,
226 	.d_discard = wd_discard
227 };
228 
229 #ifdef HAS_BAD144_HANDLING
230 static void bad144intern(struct wd_softc *);
231 #endif
232 
233 #define	WD_QUIRK_SPLIT_MOD15_WRITE	0x0001	/* must split certain writes */
234 
235 #define	WD_QUIRK_FMT "\20\1SPLIT_MOD15_WRITE"
236 
237 /*
238  * Quirk table for IDE drives.  Put more-specific matches first, since
239  * a simple globing routine is used for matching.
240  */
241 static const struct wd_quirk {
242 	const char *wdq_match;		/* inquiry pattern to match */
243 	int wdq_quirks;			/* drive quirks */
244 } wd_quirk_table[] = {
245 	/*
246 	 * Some Seagate S-ATA drives have a PHY which can get confused
247 	 * with the way data is packetized by some S-ATA controllers.
248 	 *
249 	 * The work-around is to split in two any write transfer whose
250 	 * sector count % 15 == 1 (assuming 512 byte sectors).
251 	 *
252 	 * XXX This is an incomplete list.  There are at least a couple
253 	 * XXX more model numbers.  If you have trouble with such transfers
254 	 * XXX (8K is the most common) on Seagate S-ATA drives, please
255 	 * XXX notify thorpej@NetBSD.org.
256 	 *
257 	 * The ST360015AS has not yet been confirmed to have this
258 	 * issue, however, it is the only other drive in the
259 	 * Seagate Barracuda Serial ATA V family.
260 	 *
261 	 */
262 	{ "ST3120023AS", WD_QUIRK_SPLIT_MOD15_WRITE },
263 	{ "ST380023AS", WD_QUIRK_SPLIT_MOD15_WRITE },
264 	{ "ST360015AS", WD_QUIRK_SPLIT_MOD15_WRITE },
265 	{ NULL,
266 	  0 }
267 };
268 
269 static const struct wd_quirk *
270 wd_lookup_quirks(const char *name)
271 {
272 	const struct wd_quirk *wdq;
273 	const char *estr;
274 
275 	for (wdq = wd_quirk_table; wdq->wdq_match != NULL; wdq++) {
276 		/*
277 		 * We only want exact matches (which include matches
278 		 * against globbing characters).
279 		 */
280 		if (pmatch(name, wdq->wdq_match, &estr) == 2)
281 			return (wdq);
282 	}
283 	return (NULL);
284 }
285 
286 static int
287 wdprobe(device_t parent, cfdata_t match, void *aux)
288 {
289 	struct ata_device *adev = aux;
290 
291 	if (adev == NULL)
292 		return 0;
293 	if (adev->adev_bustype->bustype_type != SCSIPI_BUSTYPE_ATA)
294 		return 0;
295 
296 	if (match->cf_loc[ATA_HLCF_DRIVE] != ATA_HLCF_DRIVE_DEFAULT &&
297 	    match->cf_loc[ATA_HLCF_DRIVE] != adev->adev_drv_data->drive)
298 		return 0;
299 	return 1;
300 }
301 
302 static void
303 wdattach(device_t parent, device_t self, void *aux)
304 {
305 	struct wd_softc *wd = device_private(self);
306 	struct dk_softc *dksc = &wd->sc_dksc;
307 	struct ata_device *adev= aux;
308 	int i, blank;
309 	char tbuf[41],pbuf[9], c, *p, *q;
310 	const struct wd_quirk *wdq;
311 	int dtype = DKTYPE_UNKNOWN;
312 
313 	dksc->sc_dev = self;
314 
315 	ATADEBUG_PRINT(("wdattach\n"), DEBUG_FUNCS | DEBUG_PROBE);
316 	mutex_init(&wd->sc_lock, MUTEX_DEFAULT, IPL_BIO);
317 #ifdef WD_SOFTBADSECT
318 	SLIST_INIT(&wd->sc_bslist);
319 #endif
320 	wd->atabus = adev->adev_bustype;
321 	wd->inflight = 0;
322 	wd->drvp = adev->adev_drv_data;
323 
324 	wd->drvp->drv_openings = 1;
325 	wd->drvp->drv_done = wddone;
326 	wd->drvp->drv_softc = dksc->sc_dev; /* done in atabusconfig_thread()
327 					     but too late */
328 
329 	SLIST_INIT(&wd->sc_retry_list);
330 	SLIST_INIT(&wd->sc_requeue_list);
331 	callout_init(&wd->sc_retry_callout, 0);		/* XXX MPSAFE */
332 	callout_init(&wd->sc_requeue_callout, 0);	/* XXX MPSAFE */
333 	callout_init(&wd->sc_restart_diskqueue, 0);	/* XXX MPSAFE */
334 
335 	aprint_naive("\n");
336 	aprint_normal("\n");
337 
338 	/* read our drive info */
339 	if (wd_get_params(wd, &wd->sc_params) != 0) {
340 		aprint_error_dev(self, "IDENTIFY failed\n");
341 		goto out;
342 	}
343 
344 	for (blank = 0, p = wd->sc_params.atap_model, q = tbuf, i = 0;
345 	    i < sizeof(wd->sc_params.atap_model); i++) {
346 		c = *p++;
347 		if (c == '\0')
348 			break;
349 		if (c != ' ') {
350 			if (blank) {
351 				*q++ = ' ';
352 				blank = 0;
353 			}
354 			*q++ = c;
355 		} else
356 			blank = 1;
357 	}
358 	*q++ = '\0';
359 
360 	wd->sc_typename = kmem_asprintf("%s", tbuf);
361 	aprint_normal_dev(self, "<%s>\n", wd->sc_typename);
362 
363 	wdq = wd_lookup_quirks(tbuf);
364 	if (wdq != NULL)
365 		wd->sc_quirks = wdq->wdq_quirks;
366 
367 	if (wd->sc_quirks != 0) {
368 		char sbuf[sizeof(WD_QUIRK_FMT) + 64];
369 		snprintb(sbuf, sizeof(sbuf), WD_QUIRK_FMT, wd->sc_quirks);
370 		aprint_normal_dev(self, "quirks %s\n", sbuf);
371 
372 		if (wd->sc_quirks & WD_QUIRK_SPLIT_MOD15_WRITE) {
373 			aprint_error_dev(self, "drive corrupts write transfers with certain controllers, consider replacing\n");
374 		}
375 	}
376 
377 	if ((wd->sc_params.atap_multi & 0xff) > 1) {
378 		wd->drvp->multi = wd->sc_params.atap_multi & 0xff;
379 	} else {
380 		wd->drvp->multi = 1;
381 	}
382 
383 	aprint_verbose_dev(self, "drive supports %d-sector PIO transfers,",
384 	    wd->drvp->multi);
385 
386 	/* 48-bit LBA addressing */
387 	if ((wd->sc_params.atap_cmd2_en & ATA_CMD2_LBA48) != 0)
388 		wd->sc_flags |= WDF_LBA48;
389 
390 	/* Prior to ATA-4, LBA was optional. */
391 	if ((wd->sc_params.atap_capabilities1 & WDC_CAP_LBA) != 0)
392 		wd->sc_flags |= WDF_LBA;
393 #if 0
394 	/* ATA-4 requires LBA. */
395 	if (wd->sc_params.atap_ataversion != 0xffff &&
396 	    wd->sc_params.atap_ataversion >= WDC_VER_ATA4)
397 		wd->sc_flags |= WDF_LBA;
398 #endif
399 
400 	if ((wd->sc_flags & WDF_LBA48) != 0) {
401 		aprint_verbose(" LBA48 addressing\n");
402 		wd->sc_capacity =
403 		    ((uint64_t) wd->sc_params.atap_max_lba[3] << 48) |
404 		    ((uint64_t) wd->sc_params.atap_max_lba[2] << 32) |
405 		    ((uint64_t) wd->sc_params.atap_max_lba[1] << 16) |
406 		    ((uint64_t) wd->sc_params.atap_max_lba[0] <<  0);
407 		wd->sc_capacity28 =
408 		    (wd->sc_params.atap_capacity[1] << 16) |
409 		    wd->sc_params.atap_capacity[0];
410 	} else if ((wd->sc_flags & WDF_LBA) != 0) {
411 		aprint_verbose(" LBA addressing\n");
412 		wd->sc_capacity28 = wd->sc_capacity =
413 		    (wd->sc_params.atap_capacity[1] << 16) |
414 		    wd->sc_params.atap_capacity[0];
415 	} else {
416 		aprint_verbose(" chs addressing\n");
417 		wd->sc_capacity28 = wd->sc_capacity =
418 		    wd->sc_params.atap_cylinders *
419 		    wd->sc_params.atap_heads *
420 		    wd->sc_params.atap_sectors;
421 	}
422 	if ((wd->sc_params.atap_secsz & ATA_SECSZ_VALID_MASK) == ATA_SECSZ_VALID
423 	    && ((wd->sc_params.atap_secsz & ATA_SECSZ_LLS) != 0)) {
424 		wd->sc_blksize = 2ULL *
425 		    ((uint32_t)((wd->sc_params.atap_lls_secsz[1] << 16) |
426 		    wd->sc_params.atap_lls_secsz[0]));
427 	} else {
428 		wd->sc_blksize = 512;
429 	}
430 	wd->sc_sectoralign.dsa_firstaligned = 0;
431 	wd->sc_sectoralign.dsa_alignment = 1;
432 	if ((wd->sc_params.atap_secsz & ATA_SECSZ_VALID_MASK) == ATA_SECSZ_VALID
433 	    && ((wd->sc_params.atap_secsz & ATA_SECSZ_LPS) != 0)) {
434 		wd->sc_sectoralign.dsa_alignment = 1 <<
435 		    (wd->sc_params.atap_secsz & ATA_SECSZ_LPS_SZMSK);
436 		if ((wd->sc_params.atap_logical_align & ATA_LA_VALID_MASK) ==
437 		    ATA_LA_VALID) {
438 			wd->sc_sectoralign.dsa_firstaligned =
439 			    (wd->sc_sectoralign.dsa_alignment -
440 				(wd->sc_params.atap_logical_align &
441 				    ATA_LA_MASK));
442 		}
443 	}
444 	wd->sc_capacity512 = (wd->sc_capacity * wd->sc_blksize) / DEV_BSIZE;
445 	format_bytes(pbuf, sizeof(pbuf), wd->sc_capacity * wd->sc_blksize);
446 	aprint_normal_dev(self, "%s, %d cyl, %d head, %d sec, "
447 	    "%d bytes/sect x %llu sectors",
448 	    pbuf,
449 	    (wd->sc_flags & WDF_LBA) ? (int)(wd->sc_capacity /
450 		(wd->sc_params.atap_heads * wd->sc_params.atap_sectors)) :
451 		wd->sc_params.atap_cylinders,
452 	    wd->sc_params.atap_heads, wd->sc_params.atap_sectors,
453 	    wd->sc_blksize, (unsigned long long)wd->sc_capacity);
454 	if (wd->sc_sectoralign.dsa_alignment != 1) {
455 		aprint_normal(" (%d bytes/physsect",
456 		    wd->sc_sectoralign.dsa_alignment * wd->sc_blksize);
457 		if (wd->sc_sectoralign.dsa_firstaligned != 0) {
458 			aprint_normal("; first aligned sector: %jd",
459 			    (intmax_t)wd->sc_sectoralign.dsa_firstaligned);
460 		}
461 		aprint_normal(")");
462 	}
463 	aprint_normal("\n");
464 
465 	ATADEBUG_PRINT(("%s: atap_dmatiming_mimi=%d, atap_dmatiming_recom=%d\n",
466 	    device_xname(self), wd->sc_params.atap_dmatiming_mimi,
467 	    wd->sc_params.atap_dmatiming_recom), DEBUG_PROBE);
468 
469 	if (wd->sc_blksize <= 0 || !powerof2(wd->sc_blksize) ||
470 	    wd->sc_blksize < DEV_BSIZE || wd->sc_blksize > MAXPHYS) {
471 		aprint_normal_dev(self, "WARNING: block size %u "
472 		    "might not actually work\n", wd->sc_blksize);
473 	}
474 
475 	if (strcmp(wd->sc_params.atap_model, "ST506") == 0)
476 		dtype = DKTYPE_ST506;
477 	else
478 		dtype = DKTYPE_ESDI;
479 
480 out:
481 	/*
482 	 * Initialize and attach the disk structure.
483 	 */
484 	dk_init(dksc, self, dtype);
485 	disk_init(&dksc->sc_dkdev, dksc->sc_xname, &wddkdriver);
486 
487 	/* Attach dk and disk subsystems */
488 	dk_attach(dksc);
489 	disk_attach(&dksc->sc_dkdev);
490 	wd_set_geometry(wd);
491 
492 	bufq_alloc(&dksc->sc_bufq, BUFQ_DISK_DEFAULT_STRAT, BUFQ_SORT_RAWBLOCK);
493 
494 	/* reference to label structure, used by ata code */
495 	wd->drvp->lp = dksc->sc_dkdev.dk_label;
496 
497 	/* Discover wedges on this disk. */
498 	dkwedge_discover(&dksc->sc_dkdev);
499 
500 	if (!pmf_device_register1(self, wd_suspend, NULL, wd_shutdown))
501 		aprint_error_dev(self, "couldn't establish power handler\n");
502 
503 	wd_sysctl_attach(wd);
504 }
505 
506 static bool
507 wd_suspend(device_t dv, const pmf_qual_t *qual)
508 {
509 	struct wd_softc *sc = device_private(dv);
510 
511 	/* the adapter needs to be enabled */
512 	if (sc->atabus->ata_addref(sc->drvp))
513 		return true; /* no need to complain */
514 
515 	wd_flushcache(sc, AT_WAIT);
516 	wd_standby(sc, AT_WAIT);
517 
518 	sc->atabus->ata_delref(sc->drvp);
519 	return true;
520 }
521 
522 static int
523 wddetach(device_t self, int flags)
524 {
525 	struct wd_softc *wd = device_private(self);
526 	struct dk_softc *dksc = &wd->sc_dksc;
527 	int bmaj, cmaj, i, mn, rc;
528 
529 	if ((rc = disk_begindetach(&dksc->sc_dkdev, wd_lastclose, self, flags)) != 0)
530 		return rc;
531 
532 	/* locate the major number */
533 	bmaj = bdevsw_lookup_major(&wd_bdevsw);
534 	cmaj = cdevsw_lookup_major(&wd_cdevsw);
535 
536 	/* Nuke the vnodes for any open instances. */
537 	for (i = 0; i < MAXPARTITIONS; i++) {
538 		mn = WDMINOR(device_unit(self), i);
539 		vdevgone(bmaj, mn, mn, VBLK);
540 		vdevgone(cmaj, mn, mn, VCHR);
541 	}
542 
543 	dk_drain(dksc);
544 
545 	/* Kill off any pending commands. */
546 	mutex_enter(&wd->sc_lock);
547 	wd->atabus->ata_killpending(wd->drvp);
548 
549 	callout_halt(&wd->sc_retry_callout, &wd->sc_lock);
550 	callout_destroy(&wd->sc_retry_callout);
551 	callout_halt(&wd->sc_requeue_callout, &wd->sc_lock);
552 	callout_destroy(&wd->sc_requeue_callout);
553 	callout_halt(&wd->sc_restart_diskqueue, &wd->sc_lock);
554 	callout_destroy(&wd->sc_restart_diskqueue);
555 
556 	mutex_exit(&wd->sc_lock);
557 
558 	bufq_free(dksc->sc_bufq);
559 
560 	/* Delete all of our wedges. */
561 	dkwedge_delall(&dksc->sc_dkdev);
562 
563 	if (flags & DETACH_POWEROFF)
564 		wd_standby(wd, AT_POLL);
565 
566 	/* Detach from the disk list. */
567 	disk_detach(&dksc->sc_dkdev);
568 	disk_destroy(&dksc->sc_dkdev);
569 
570 	dk_detach(dksc);
571 
572 #ifdef WD_SOFTBADSECT
573 	/* Clean out the bad sector list */
574 	while (!SLIST_EMPTY(&wd->sc_bslist)) {
575 		struct disk_badsectors *dbs = SLIST_FIRST(&wd->sc_bslist);
576 		SLIST_REMOVE_HEAD(&wd->sc_bslist, dbs_next);
577 		kmem_free(dbs, sizeof(*dbs));
578 	}
579 	wd->sc_bscount = 0;
580 #endif
581 	if (wd->sc_typename != NULL) {
582 		kmem_free(wd->sc_typename, strlen(wd->sc_typename) + 1);
583 		wd->sc_typename = NULL;
584 	}
585 
586 	pmf_device_deregister(self);
587 
588 	wd_sysctl_detach(wd);
589 
590 	mutex_destroy(&wd->sc_lock);
591 
592 	wd->drvp->drive_type = ATA_DRIVET_NONE; /* no drive any more here */
593 	wd->drvp->drive_flags = 0;
594 
595 	return (0);
596 }
597 
598 /*
599  * Read/write routine for a buffer.  Validates the arguments and schedules the
600  * transfer.  Does not wait for the transfer to complete.
601  */
602 static void
603 wdstrategy(struct buf *bp)
604 {
605 	struct wd_softc *wd =
606 	    device_lookup_private(&wd_cd, WDUNIT(bp->b_dev));
607 	struct dk_softc *dksc = &wd->sc_dksc;
608 
609 	ATADEBUG_PRINT(("wdstrategy (%s)\n", dksc->sc_xname),
610 	    DEBUG_XFERS);
611 
612 	/* If device invalidated (e.g. media change, door open,
613 	 * device detachment), then error.
614 	 */
615 	if ((wd->sc_flags & WDF_LOADED) == 0 ||
616 	    !device_is_enabled(dksc->sc_dev))
617 		goto err;
618 
619 #ifdef WD_SOFTBADSECT
620 	/*
621 	 * If the transfer about to be attempted contains only a block that
622 	 * is known to be bad then return an error for the transfer without
623 	 * even attempting to start a transfer up under the premis that we
624 	 * will just end up doing more retries for a transfer that will end
625 	 * up failing again.
626 	 */
627 	if (__predict_false(!SLIST_EMPTY(&wd->sc_bslist))) {
628 		struct disklabel *lp = dksc->sc_dkdev.dk_label;
629 		struct disk_badsectors *dbs;
630 		daddr_t blkno, maxblk;
631 
632 		/* convert the block number to absolute */
633 		if (lp->d_secsize >= DEV_BSIZE)
634 			blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE);
635 		else
636 			blkno = bp->b_blkno * (DEV_BSIZE / lp->d_secsize);
637 		if (WDPART(bp->b_dev) != RAW_PART)
638 			blkno += lp->d_partitions[WDPART(bp->b_dev)].p_offset;
639 		maxblk = blkno + (bp->b_bcount / wd->sc_blksize) - 1;
640 
641 		mutex_enter(&wd->sc_lock);
642 		SLIST_FOREACH(dbs, &wd->sc_bslist, dbs_next)
643 			if ((dbs->dbs_min <= bp->b_rawblkno &&
644 			     bp->b_rawblkno <= dbs->dbs_max) ||
645 			    (dbs->dbs_min <= maxblk && maxblk <= dbs->dbs_max)){
646 				mutex_exit(&wd->sc_lock);
647 				goto err;
648 			}
649 		mutex_exit(&wd->sc_lock);
650 	}
651 #endif
652 
653 	dk_strategy(dksc, bp);
654 	return;
655 
656 err:
657 	bp->b_error = EIO;
658 	bp->b_resid = bp->b_bcount;
659 	biodone(bp);
660 }
661 
662 static void
663 wdstart1(struct wd_softc *wd, struct buf *bp, struct ata_xfer *xfer)
664 {
665 	struct dk_softc *dksc = &wd->sc_dksc;
666 	const uint32_t secsize = dksc->sc_dkdev.dk_geom.dg_secsize;
667 
668 	KASSERT(bp == xfer->c_bio.bp || xfer->c_bio.bp == NULL);
669 	KASSERT((xfer->c_flags & (C_WAITACT|C_FREE)) == 0);
670 	KASSERT(mutex_owned(&wd->sc_lock));
671 
672 	/* Reset state, so that retries don't use stale info */
673 	if (__predict_false(xfer->c_retries > 0)) {
674 		xfer->c_flags = 0;
675 		memset(&xfer->c_bio, 0, sizeof(xfer->c_bio));
676 	}
677 
678 	xfer->c_bio.blkno = bp->b_rawblkno;
679 	xfer->c_bio.bcount = bp->b_bcount;
680 	xfer->c_bio.databuf = bp->b_data;
681 	xfer->c_bio.blkdone = 0;
682 	xfer->c_bio.bp = bp;
683 
684 	/* Adjust blkno and bcount if xfer has been already partially done */
685 	if (__predict_false(xfer->c_skip > 0)) {
686 		KASSERT(xfer->c_skip < xfer->c_bio.bcount);
687 		KASSERT((xfer->c_skip % secsize) == 0);
688 		xfer->c_bio.bcount -= xfer->c_skip;
689 		xfer->c_bio.blkno += xfer->c_skip / secsize;
690 	}
691 
692 #ifdef WD_CHAOS_MONKEY
693 	/*
694 	 * Override blkno to be over device capacity to trigger error,
695 	 * but only if it's read, to avoid trashing disk contents should
696 	 * the command be clipped, or otherwise misinterpreted, by the
697 	 * driver or controller.
698 	 */
699 	if (BUF_ISREAD(bp) && xfer->c_retries == 0 && wd->drv_chaos_freq > 0 &&
700 	    (++wd->drv_chaos_cnt % wd->drv_chaos_freq) == 0) {
701 		device_printf(dksc->sc_dev, "%s: chaos xfer %"PRIxPTR"\n",
702 		    __func__, (intptr_t)xfer & PAGE_MASK);
703 		xfer->c_bio.blkno = 7777777 + wd->sc_capacity;
704 		xfer->c_flags |= C_CHAOS;
705 	}
706 #endif
707 
708 	/*
709 	 * If we're retrying, retry in single-sector mode. This will give us
710 	 * the sector number of the problem, and will eventually allow the
711 	 * transfer to succeed. If FUA is requested, we can't actually
712 	 * do this, as ATA_SINGLE is usually executed as PIO transfer by drivers
713 	 * which support it, and that isn't compatible with NCQ/FUA.
714 	 */
715 	if (xfer->c_retries >= WDIORETRIES_SINGLE &&
716 	    (bp->b_flags & B_MEDIA_FUA) == 0)
717 		xfer->c_bio.flags = ATA_SINGLE;
718 	else
719 		xfer->c_bio.flags = 0;
720 
721 	/*
722 	 * request LBA48 transfers when supported by the controller
723 	 * and needed by transfer offset or size.
724 	 */
725 	if (wd->sc_flags & WDF_LBA48 &&
726 	    (((xfer->c_bio.blkno + xfer->c_bio.bcount / secsize) >
727 	    wd->sc_capacity28) ||
728 	    ((xfer->c_bio.bcount / secsize) > 128)))
729 		xfer->c_bio.flags |= ATA_LBA48;
730 
731 	/*
732 	 * If NCQ was negotiated, always use it for the first several attempts.
733 	 * Since device cancels all outstanding requests on error, downgrade
734 	 * to non-NCQ on retry, so that the retried transfer would not cause
735 	 * cascade failure for the other transfers if it fails again.
736 	 * If FUA was requested, we can't downgrade, as that would violate
737 	 * the semantics - FUA would not be honored. In that case, continue
738 	 * retrying with NCQ.
739 	 */
740 	if (WD_USE_NCQ(wd) && (xfer->c_retries < WDIORETRIES_SINGLE ||
741 	    (bp->b_flags & B_MEDIA_FUA) != 0)) {
742 		xfer->c_bio.flags |= ATA_LBA48;
743 		xfer->c_flags |= C_NCQ;
744 
745 		if (WD_USE_NCQ_PRIO(wd) &&
746 		    BIO_GETPRIO(bp) == BPRIO_TIMECRITICAL)
747 			xfer->c_bio.flags |= ATA_PRIO_HIGH;
748 	}
749 
750 	if (wd->sc_flags & WDF_LBA)
751 		xfer->c_bio.flags |= ATA_LBA;
752 	if (bp->b_flags & B_READ) {
753 		xfer->c_bio.flags |= ATA_READ;
754 	} else {
755 		/* it's a write */
756 		wd->sc_flags |= WDF_DIRTY;
757 	}
758 	if (bp->b_flags & B_MEDIA_FUA) {
759 		/* If not using NCQ, the command WRITE DMA FUA EXT is LBA48 */
760 		KASSERT((wd->sc_flags & WDF_LBA48) != 0);
761 		if ((xfer->c_flags & C_NCQ) == 0)
762 			xfer->c_bio.flags |= ATA_LBA48;
763 
764 		xfer->c_bio.flags |= ATA_FUA;
765 	}
766 
767 	if (xfer->c_retries == 0)
768 		wd->inflight++;
769 	mutex_exit(&wd->sc_lock);
770 
771 	/* Queue the xfer */
772 	wd->atabus->ata_bio(wd->drvp, xfer);
773 
774 	mutex_enter(&wd->sc_lock);
775 }
776 
777 static int
778 wd_diskstart(device_t dev, struct buf *bp)
779 {
780 	struct wd_softc *wd = device_private(dev);
781 #ifdef ATADEBUG
782 	struct dk_softc *dksc = &wd->sc_dksc;
783 #endif
784 	struct ata_xfer *xfer;
785 	struct ata_channel *chp;
786 	unsigned openings;
787 	int ticks;
788 
789 	mutex_enter(&wd->sc_lock);
790 
791 	chp = wd->drvp->chnl_softc;
792 
793 	ata_channel_lock(chp);
794 	openings = ata_queue_openings(chp);
795 	ata_channel_unlock(chp);
796 
797 	openings = uimin(openings, wd->drvp->drv_openings);
798 
799 	if (wd->inflight >= openings) {
800 		/*
801 		 * pretend we run out of memory when the queue is full,
802 		 * so that the operation is retried after a minimal
803 		 * delay.
804 		 */
805 		xfer = NULL;
806 		ticks = 1;
807 	} else {
808 		/*
809 		 * If there is no available memory, retry later. This
810 		 * happens very rarely and only under memory pressure,
811 		 * so wait relatively long before retry.
812 		 */
813 		xfer = ata_get_xfer(chp, false);
814 		ticks = hz/2;
815 	}
816 
817 	if (xfer == NULL) {
818 		ATADEBUG_PRINT(("wd_diskstart %s no xfer\n",
819 		    dksc->sc_xname), DEBUG_XFERS);
820 
821 		/*
822 		 * The disk queue is pushed automatically when an I/O
823 		 * operation finishes or another one is queued. We
824 		 * need this extra timeout because an ATA channel
825 		 * might be shared by more than one disk queue and
826 		 * all queues need to be restarted when another slot
827 		 * becomes available.
828 		 */
829 		if (!callout_pending(&wd->sc_restart_diskqueue)) {
830 			callout_reset(&wd->sc_restart_diskqueue, ticks,
831 			    wdrestart, dev);
832 		}
833 
834 		mutex_exit(&wd->sc_lock);
835 		return EAGAIN;
836 	}
837 
838 	wdstart1(wd, bp, xfer);
839 
840 	mutex_exit(&wd->sc_lock);
841 
842 	return 0;
843 }
844 
845 /*
846  * Queue a drive for I/O.
847  */
848 static void
849 wdrestart(void *x)
850 {
851 	device_t self = x;
852 	struct wd_softc *wd = device_private(self);
853 	struct dk_softc *dksc = &wd->sc_dksc;
854 
855 	ATADEBUG_PRINT(("wdstart %s\n", dksc->sc_xname),
856 	    DEBUG_XFERS);
857 
858 	if (!device_is_active(dksc->sc_dev))
859 		return;
860 
861 	dk_start(dksc, NULL);
862 }
863 
864 static void
865 wddone(device_t self, struct ata_xfer *xfer)
866 {
867 	struct wd_softc *wd = device_private(self);
868 	struct dk_softc *dksc = &wd->sc_dksc;
869 	const char *errmsg;
870 	int do_perror = 0;
871 	struct buf *bp;
872 
873 	ATADEBUG_PRINT(("wddone %s\n", dksc->sc_xname),
874 	    DEBUG_XFERS);
875 
876 	if (__predict_false(wddoingadump)) {
877 		/* just drop it to the floor */
878 		ata_free_xfer(wd->drvp->chnl_softc, xfer);
879 		return;
880 	}
881 
882 	bp = xfer->c_bio.bp;
883 	KASSERT(bp != NULL);
884 
885 	bp->b_resid = xfer->c_bio.bcount;
886 	switch (xfer->c_bio.error) {
887 	case ERR_DMA:
888 		errmsg = "DMA error";
889 		goto retry;
890 	case ERR_DF:
891 		errmsg = "device fault";
892 		goto retry;
893 	case TIMEOUT:
894 		errmsg = "device timeout";
895 		goto retry;
896 	case REQUEUE:
897 		errmsg = "requeue";
898 		goto retry2;
899 	case ERR_RESET:
900 		errmsg = "channel reset";
901 		goto retry2;
902 	case ERROR:
903 		/* Don't care about media change bits */
904 		if (xfer->c_bio.r_error != 0 &&
905 		    (xfer->c_bio.r_error & ~(WDCE_MC | WDCE_MCR)) == 0)
906 			goto noerror;
907 		errmsg = "error";
908 		do_perror = 1;
909 retry:		/* Just reset and retry. Can we do more ? */
910 		if ((xfer->c_flags & C_RECOVERED) == 0) {
911 			int wflags = (xfer->c_flags & C_POLL) ? AT_POLL : 0;
912 			ata_channel_lock(wd->drvp->chnl_softc);
913 			ata_thread_run(wd->drvp->chnl_softc, wflags,
914 			    ATACH_TH_DRIVE_RESET, wd->drvp->drive);
915 			ata_channel_unlock(wd->drvp->chnl_softc);
916 		}
917 retry2:
918 		mutex_enter(&wd->sc_lock);
919 
920 		diskerr(bp, "wd", errmsg, LOG_PRINTF,
921 		    xfer->c_bio.blkdone, dksc->sc_dkdev.dk_label);
922 		if (xfer->c_retries < WDIORETRIES)
923 			printf(", xfer %"PRIxPTR", retry %d",
924 			    (intptr_t)xfer & PAGE_MASK,
925 			    xfer->c_retries);
926 		printf("\n");
927 		if (do_perror)
928 			wdperror(wd, xfer);
929 
930 		if (xfer->c_retries < WDIORETRIES) {
931 			xfer->c_retries++;
932 
933 			/* Rerun ASAP if just requeued */
934 			if (xfer->c_bio.error == REQUEUE) {
935 				SLIST_INSERT_HEAD(&wd->sc_requeue_list, xfer,
936 				    c_retrychain);
937 				callout_reset(&wd->sc_requeue_callout,
938 				    1, wdbiorequeue, wd);
939 			} else {
940 				SLIST_INSERT_HEAD(&wd->sc_retry_list, xfer,
941 				    c_retrychain);
942 				callout_reset(&wd->sc_retry_callout,
943 				    RECOVERYTIME, wdbioretry, wd);
944 			}
945 
946 			mutex_exit(&wd->sc_lock);
947 			return;
948 		}
949 
950 		mutex_exit(&wd->sc_lock);
951 
952 #ifdef WD_SOFTBADSECT
953 		/*
954 		 * Not all errors indicate a failed block but those that do,
955 		 * put the block on the bad-block list for the device.  Only
956 		 * do this for reads because the drive should do it for writes,
957 		 * itself, according to Manuel.
958 		 */
959 		if ((bp->b_flags & B_READ) &&
960 		    ((wd->drvp->ata_vers >= 4 && xfer->c_bio.r_error & 64) ||
961 		     (wd->drvp->ata_vers < 4 && xfer->c_bio.r_error & 192))) {
962 			struct disk_badsectors *dbs;
963 
964 			dbs = kmem_zalloc(sizeof *dbs, KM_NOSLEEP);
965 			if (dbs == NULL) {
966 				aprint_error_dev(dksc->sc_dev,
967 				    "failed to add bad block to list\n");
968 				goto out;
969 			}
970 
971 			dbs->dbs_min = bp->b_rawblkno;
972 			dbs->dbs_max = dbs->dbs_min +
973 			    (bp->b_bcount /wd->sc_blksize) - 1;
974 			microtime(&dbs->dbs_failedat);
975 
976 			mutex_enter(&wd->sc_lock);
977 			SLIST_INSERT_HEAD(&wd->sc_bslist, dbs, dbs_next);
978 			wd->sc_bscount++;
979 			mutex_exit(&wd->sc_lock);
980 		}
981 out:
982 #endif
983 		bp->b_error = EIO;
984 		break;
985 	case NOERROR:
986 #ifdef WD_CHAOS_MONKEY
987 		/*
988 		 * For example Parallels AHCI emulation doesn't actually
989 		 * return error for the invalid I/O, so just re-run
990 		 * the request and do not panic.
991 		 */
992 		if (__predict_false(xfer->c_flags & C_CHAOS)) {
993 			xfer->c_bio.error = REQUEUE;
994 			errmsg = "chaos noerror";
995 			goto retry2;
996 		}
997 #endif
998 
999 noerror:	if ((xfer->c_bio.flags & ATA_CORR) || xfer->c_retries > 0)
1000 			device_printf(dksc->sc_dev,
1001 			    "soft error (corrected) xfer %"PRIxPTR"\n",
1002 			    (intptr_t)xfer & PAGE_MASK);
1003 		break;
1004 	case ERR_NODEV:
1005 		bp->b_error = EIO;
1006 		break;
1007 	}
1008 	if (__predict_false(bp->b_error != 0) && bp->b_resid == 0) {
1009 		/*
1010 		 * the disk or controller sometimes report a complete
1011 		 * xfer, when there has been an error. This is wrong,
1012 		 * assume nothing got transferred in this case
1013 		 */
1014 		bp->b_resid = bp->b_bcount;
1015 	}
1016 
1017 	ata_free_xfer(wd->drvp->chnl_softc, xfer);
1018 
1019 	mutex_enter(&wd->sc_lock);
1020 	wd->inflight--;
1021 	mutex_exit(&wd->sc_lock);
1022 	dk_done(dksc, bp);
1023 	dk_start(dksc, NULL);
1024 }
1025 
1026 static void
1027 wdbioretry(void *v)
1028 {
1029 	struct wd_softc *wd = v;
1030 	struct ata_xfer *xfer;
1031 
1032 	ATADEBUG_PRINT(("%s %s\n", __func__, wd->sc_dksc.sc_xname),
1033 	    DEBUG_XFERS);
1034 
1035 	mutex_enter(&wd->sc_lock);
1036 	while ((xfer = SLIST_FIRST(&wd->sc_retry_list))) {
1037 		SLIST_REMOVE_HEAD(&wd->sc_retry_list, c_retrychain);
1038 		wdstart1(wd, xfer->c_bio.bp, xfer);
1039 	}
1040 	mutex_exit(&wd->sc_lock);
1041 }
1042 
1043 static void
1044 wdbiorequeue(void *v)
1045 {
1046 	struct wd_softc *wd = v;
1047 	struct ata_xfer *xfer;
1048 
1049 	ATADEBUG_PRINT(("%s %s\n", __func__, wd->sc_dksc.sc_xname),
1050 	    DEBUG_XFERS);
1051 
1052 	mutex_enter(&wd->sc_lock);
1053 	while ((xfer = SLIST_FIRST(&wd->sc_requeue_list))) {
1054 		SLIST_REMOVE_HEAD(&wd->sc_requeue_list, c_retrychain);
1055 		wdstart1(wd, xfer->c_bio.bp, xfer);
1056 	}
1057 	mutex_exit(&wd->sc_lock);
1058 }
1059 
1060 static void
1061 wdminphys(struct buf *bp)
1062 {
1063 	const struct wd_softc * const wd =
1064 	    device_lookup_private(&wd_cd, WDUNIT(bp->b_dev));
1065 	int maxsectors;
1066 
1067 	/*
1068 	 * The limit is actually 65536 for LBA48 and 256 for non-LBA48,
1069 	 * but that requires to set the count for the ATA command
1070 	 * to 0, which is somewhat error prone, so better stay safe.
1071 	 */
1072 	if (wd->sc_flags & WDF_LBA48)
1073 		maxsectors = 65535;
1074 	else
1075 		maxsectors = 128;
1076 
1077 	if (bp->b_bcount > (wd->sc_blksize * maxsectors))
1078 		bp->b_bcount = (wd->sc_blksize * maxsectors);
1079 
1080 	minphys(bp);
1081 }
1082 
1083 static void
1084 wd_iosize(device_t dev, int *count)
1085 {
1086 	struct buf B;
1087 	int bmaj;
1088 
1089 	bmaj       = bdevsw_lookup_major(&wd_bdevsw);
1090 	B.b_dev    = MAKEWDDEV(bmaj,device_unit(dev),RAW_PART);
1091 	B.b_bcount = *count;
1092 
1093 	wdminphys(&B);
1094 
1095 	*count = B.b_bcount;
1096 }
1097 
1098 static int
1099 wdread(dev_t dev, struct uio *uio, int flags)
1100 {
1101 
1102 	ATADEBUG_PRINT(("wdread\n"), DEBUG_XFERS);
1103 	return (physio(wdstrategy, NULL, dev, B_READ, wdminphys, uio));
1104 }
1105 
1106 static int
1107 wdwrite(dev_t dev, struct uio *uio, int flags)
1108 {
1109 
1110 	ATADEBUG_PRINT(("wdwrite\n"), DEBUG_XFERS);
1111 	return (physio(wdstrategy, NULL, dev, B_WRITE, wdminphys, uio));
1112 }
1113 
1114 static int
1115 wdopen(dev_t dev, int flag, int fmt, struct lwp *l)
1116 {
1117 	struct wd_softc *wd;
1118 	struct dk_softc *dksc;
1119 	int unit, part, error;
1120 
1121 	ATADEBUG_PRINT(("wdopen\n"), DEBUG_FUNCS);
1122 	unit = WDUNIT(dev);
1123 	wd = device_lookup_private(&wd_cd, unit);
1124 	if (wd == NULL)
1125 		return (ENXIO);
1126 	dksc = &wd->sc_dksc;
1127 
1128 	if (! device_is_active(dksc->sc_dev))
1129 		return (ENODEV);
1130 
1131 	part = WDPART(dev);
1132 
1133 	if (wd->sc_capacity == 0)
1134 		return (ENODEV);
1135 
1136 	/*
1137 	 * If any partition is open, but the disk has been invalidated,
1138 	 * disallow further opens.
1139 	 */
1140 	if ((wd->sc_flags & (WDF_OPEN | WDF_LOADED)) == WDF_OPEN) {
1141 		if (part != RAW_PART || fmt != S_IFCHR)
1142 			return EIO;
1143 	}
1144 
1145 	error = dk_open(dksc, dev, flag, fmt, l);
1146 
1147 	return error;
1148 }
1149 
1150 /*
1151  * Serialized by caller
1152  */
1153 static int
1154 wd_firstopen(device_t self, dev_t dev, int flag, int fmt)
1155 {
1156 	struct wd_softc *wd = device_private(self);
1157 	struct dk_softc *dksc = &wd->sc_dksc;
1158 	int error;
1159 
1160 	error = wd->atabus->ata_addref(wd->drvp);
1161 	if (error)
1162 		return error;
1163 
1164 	if ((wd->sc_flags & WDF_LOADED) == 0) {
1165 		int param_error;
1166 
1167 		/* Load the physical device parameters. */
1168 		param_error = wd_get_params(wd, &wd->sc_params);
1169 		if (param_error != 0) {
1170 			aprint_error_dev(dksc->sc_dev, "IDENTIFY failed\n");
1171 			error = EIO;
1172 			goto bad;
1173 		}
1174 		wd_set_geometry(wd);
1175 		wd->sc_flags |= WDF_LOADED;
1176 	}
1177 
1178 	wd->sc_flags |= WDF_OPEN;
1179 	return 0;
1180 
1181 bad:
1182 	wd->atabus->ata_delref(wd->drvp);
1183 	return error;
1184 }
1185 
1186 /*
1187  * Caller must hold wd->sc_dk.dk_openlock.
1188  */
1189 static int
1190 wd_lastclose(device_t self)
1191 {
1192 	struct wd_softc *wd = device_private(self);
1193 
1194 	KASSERTMSG(bufq_peek(wd->sc_dksc.sc_bufq) == NULL, "bufq not empty");
1195 
1196 	if (wd->sc_flags & WDF_DIRTY)
1197 		wd_flushcache(wd, AT_WAIT);
1198 
1199 	wd->atabus->ata_delref(wd->drvp);
1200 	wd->sc_flags &= ~WDF_OPEN;
1201 
1202 	return 0;
1203 }
1204 
1205 static int
1206 wdclose(dev_t dev, int flag, int fmt, struct lwp *l)
1207 {
1208 	struct wd_softc *wd;
1209 	struct dk_softc *dksc;
1210 	int unit;
1211 
1212 	unit = WDUNIT(dev);
1213 	wd = device_lookup_private(&wd_cd, unit);
1214 	dksc = &wd->sc_dksc;
1215 
1216 	return dk_close(dksc, dev, flag, fmt, l);
1217 }
1218 
1219 void
1220 wdperror(const struct wd_softc *wd, struct ata_xfer *xfer)
1221 {
1222 	static const char *const errstr0_3[] = {"address mark not found",
1223 	    "track 0 not found", "aborted command", "media change requested",
1224 	    "id not found", "media changed", "uncorrectable data error",
1225 	    "bad block detected"};
1226 	static const char *const errstr4_5[] = {
1227 	    "obsolete (address mark not found)",
1228 	    "no media/write protected", "aborted command",
1229 	    "media change requested", "id not found", "media changed",
1230 	    "uncorrectable data error", "interface CRC error"};
1231 	const char *const *errstr;
1232 	int i;
1233 	const char *sep = "";
1234 
1235 	const struct dk_softc *dksc = &wd->sc_dksc;
1236 	const char *devname = dksc->sc_xname;
1237 	struct ata_drive_datas *drvp = wd->drvp;
1238 	int errno = xfer->c_bio.r_error;
1239 
1240 	if (drvp->ata_vers >= 4)
1241 		errstr = errstr4_5;
1242 	else
1243 		errstr = errstr0_3;
1244 
1245 	printf("%s: (", devname);
1246 
1247 	if (errno == 0)
1248 		printf("error not notified");
1249 
1250 	for (i = 0; i < 8; i++) {
1251 		if (errno & (1 << i)) {
1252 			printf("%s%s", sep, errstr[i]);
1253 			sep = ", ";
1254 		}
1255 	}
1256 	printf(")\n");
1257 }
1258 
1259 int
1260 wdioctl(dev_t dev, u_long cmd, void *addr, int flag, struct lwp *l)
1261 {
1262 	struct wd_softc *wd =
1263 	    device_lookup_private(&wd_cd, WDUNIT(dev));
1264 	struct dk_softc *dksc = &wd->sc_dksc;
1265 
1266 	ATADEBUG_PRINT(("wdioctl\n"), DEBUG_FUNCS);
1267 
1268 	if ((wd->sc_flags & WDF_LOADED) == 0)
1269 		return EIO;
1270 
1271 	switch (cmd) {
1272 #ifdef HAS_BAD144_HANDLING
1273 	case DIOCSBAD:
1274 		if ((flag & FWRITE) == 0)
1275 			return EBADF;
1276 		dksc->sc_dkdev.dk_cpulabel->bad = *(struct dkbad *)addr;
1277 		dksc->sc_dkdev.dk_label->d_flags |= D_BADSECT;
1278 		bad144intern(wd);
1279 		return 0;
1280 #endif
1281 #ifdef WD_SOFTBADSECT
1282 	case DIOCBSLIST :
1283 	{
1284 		uint32_t count, missing, skip;
1285 		struct disk_badsecinfo dbsi;
1286 		struct disk_badsectors *dbs;
1287 		size_t available;
1288 		uint8_t *laddr;
1289 
1290 		dbsi = *(struct disk_badsecinfo *)addr;
1291 		missing = wd->sc_bscount;
1292 		count = 0;
1293 		available = dbsi.dbsi_bufsize;
1294 		skip = dbsi.dbsi_skip;
1295 		laddr = (uint8_t *)dbsi.dbsi_buffer;
1296 
1297 		/*
1298 		 * We start this loop with the expectation that all of the
1299 		 * entries will be missed and decrement this counter each
1300 		 * time we either skip over one (already copied out) or
1301 		 * we actually copy it back to user space.  The structs
1302 		 * holding the bad sector information are copied directly
1303 		 * back to user space whilst the summary is returned via
1304 		 * the struct passed in via the ioctl.
1305 		 */
1306 		mutex_enter(&wd->sc_lock);
1307 		SLIST_FOREACH(dbs, &wd->sc_bslist, dbs_next) {
1308 			if (skip > 0) {
1309 				missing--;
1310 				skip--;
1311 				continue;
1312 			}
1313 			if (available < sizeof(*dbs))
1314 				break;
1315 			available -= sizeof(*dbs);
1316 			copyout(dbs, laddr, sizeof(*dbs));
1317 			laddr += sizeof(*dbs);
1318 			missing--;
1319 			count++;
1320 		}
1321 		mutex_exit(&wd->sc_lock);
1322 		dbsi.dbsi_left = missing;
1323 		dbsi.dbsi_copied = count;
1324 		*(struct disk_badsecinfo *)addr = dbsi;
1325 		return 0;
1326 	}
1327 
1328 	case DIOCBSFLUSH :
1329 		/* Clean out the bad sector list */
1330 		mutex_enter(&wd->sc_lock);
1331 		while (!SLIST_EMPTY(&wd->sc_bslist)) {
1332 			struct disk_badsectors *dbs =
1333 			    SLIST_FIRST(&wd->sc_bslist);
1334 			SLIST_REMOVE_HEAD(&wd->sc_bslist, dbs_next);
1335 			kmem_free(dbs, sizeof(*dbs));
1336 		}
1337 		mutex_exit(&wd->sc_lock);
1338 		wd->sc_bscount = 0;
1339 		return 0;
1340 #endif
1341 
1342 #ifdef notyet
1343 	case DIOCWFORMAT:
1344 		if ((flag & FWRITE) == 0)
1345 			return EBADF;
1346 		{
1347 		register struct format_op *fop;
1348 		struct iovec aiov;
1349 		struct uio auio;
1350 		int error1;
1351 
1352 		fop = (struct format_op *)addr;
1353 		aiov.iov_base = fop->df_buf;
1354 		aiov.iov_len = fop->df_count;
1355 		auio.uio_iov = &aiov;
1356 		auio.uio_iovcnt = 1;
1357 		auio.uio_resid = fop->df_count;
1358 		auio.uio_offset =
1359 			fop->df_startblk * wd->sc_dk.dk_label->d_secsize;
1360 		auio.uio_vmspace = l->l_proc->p_vmspace;
1361 		error1 = physio(wdformat, NULL, dev, B_WRITE, wdminphys,
1362 		    &auio);
1363 		fop->df_count -= auio.uio_resid;
1364 		fop->df_reg[0] = wdc->sc_status;
1365 		fop->df_reg[1] = wdc->sc_error;
1366 		return error1;
1367 		}
1368 #endif
1369 	case DIOCGCACHE:
1370 		return wd_getcache(wd, (int *)addr);
1371 
1372 	case DIOCSCACHE:
1373 		return wd_setcache(wd, *(int *)addr);
1374 
1375 	case DIOCCACHESYNC:
1376 		return wd_flushcache(wd, AT_WAIT);
1377 
1378 	case ATAIOCCOMMAND:
1379 		/*
1380 		 * Make sure this command is (relatively) safe first
1381 		 */
1382 		if ((((atareq_t *) addr)->flags & ATACMD_READ) == 0 &&
1383 		    (flag & FWRITE) == 0)
1384 			return (EBADF);
1385 		{
1386 		struct wd_ioctl *wi;
1387 		atareq_t *atareq = (atareq_t *) addr;
1388 		int error1;
1389 
1390 		wi = wi_get(wd);
1391 		wi->wi_atareq = *atareq;
1392 
1393 		if (atareq->datalen && atareq->flags &
1394 		    (ATACMD_READ | ATACMD_WRITE)) {
1395 			void *tbuf;
1396 			if (atareq->datalen < DEV_BSIZE
1397 			    && atareq->command == WDCC_IDENTIFY) {
1398 				tbuf = kmem_zalloc(DEV_BSIZE, KM_SLEEP);
1399 				wi->wi_iov.iov_base = tbuf;
1400 				wi->wi_iov.iov_len = DEV_BSIZE;
1401 				UIO_SETUP_SYSSPACE(&wi->wi_uio);
1402 			} else {
1403 				tbuf = NULL;
1404 				wi->wi_iov.iov_base = atareq->databuf;
1405 				wi->wi_iov.iov_len = atareq->datalen;
1406 				wi->wi_uio.uio_vmspace = l->l_proc->p_vmspace;
1407 			}
1408 			wi->wi_uio.uio_iov = &wi->wi_iov;
1409 			wi->wi_uio.uio_iovcnt = 1;
1410 			wi->wi_uio.uio_resid = atareq->datalen;
1411 			wi->wi_uio.uio_offset = 0;
1412 			wi->wi_uio.uio_rw =
1413 			    (atareq->flags & ATACMD_READ) ? B_READ : B_WRITE;
1414 			error1 = physio(wdioctlstrategy, &wi->wi_bp, dev,
1415 			    (atareq->flags & ATACMD_READ) ? B_READ : B_WRITE,
1416 			    wdminphys, &wi->wi_uio);
1417 			if (tbuf != NULL && error1 == 0) {
1418 				error1 = copyout(tbuf, atareq->databuf,
1419 				    atareq->datalen);
1420 				kmem_free(tbuf, DEV_BSIZE);
1421 			}
1422 		} else {
1423 			/* No need to call physio if we don't have any
1424 			   user data */
1425 			wi->wi_bp.b_flags = 0;
1426 			wi->wi_bp.b_data = 0;
1427 			wi->wi_bp.b_bcount = 0;
1428 			wi->wi_bp.b_dev = dev;
1429 			wi->wi_bp.b_proc = l->l_proc;
1430 			wdioctlstrategy(&wi->wi_bp);
1431 			error1 = wi->wi_bp.b_error;
1432 		}
1433 		*atareq = wi->wi_atareq;
1434 		wi_free(wi);
1435 		return(error1);
1436 		}
1437 
1438 	case DIOCGSECTORALIGN: {
1439 		struct disk_sectoralign *dsa = addr;
1440 		int part = WDPART(dev);
1441 
1442 		*dsa = wd->sc_sectoralign;
1443 		if (part != RAW_PART) {
1444 			struct disklabel *lp = dksc->sc_dkdev.dk_label;
1445 			daddr_t offset = lp->d_partitions[part].p_offset;
1446 			uint32_t r = offset % dsa->dsa_alignment;
1447 
1448 			if (r < dsa->dsa_firstaligned)
1449 				dsa->dsa_firstaligned = dsa->dsa_firstaligned
1450 				    - r;
1451 			else
1452 				dsa->dsa_firstaligned = (dsa->dsa_firstaligned
1453 				    + dsa->dsa_alignment) - r;
1454 		}
1455 
1456 		return 0;
1457 	}
1458 
1459 	default:
1460 		return dk_ioctl(dksc, dev, cmd, addr, flag, l);
1461 	}
1462 
1463 #ifdef DIAGNOSTIC
1464 	panic("wdioctl: impossible");
1465 #endif
1466 }
1467 
1468 static int
1469 wd_discard(device_t dev, off_t pos, off_t len)
1470 {
1471 	struct wd_softc *wd = device_private(dev);
1472 	daddr_t bno;
1473 	long size, done;
1474 	long maxatonce, amount;
1475 	int result;
1476 
1477 	if (!(wd->sc_params.atap_ata_major & WDC_VER_ATA7)
1478 	    || !(wd->sc_params.support_dsm & ATA_SUPPORT_DSM_TRIM)) {
1479 		/* not supported; ignore request */
1480 		ATADEBUG_PRINT(("wddiscard (unsupported)\n"), DEBUG_FUNCS);
1481 		return 0;
1482 	}
1483 	maxatonce = 0xffff; /*wd->sc_params.max_dsm_blocks*/
1484 
1485 	ATADEBUG_PRINT(("wddiscard\n"), DEBUG_FUNCS);
1486 
1487 	if ((wd->sc_flags & WDF_LOADED) == 0)
1488 		return EIO;
1489 
1490 	/* round the start up and the end down */
1491 	bno = (pos + wd->sc_blksize - 1) / wd->sc_blksize;
1492 	size = ((pos + len) / wd->sc_blksize) - bno;
1493 
1494 	done = 0;
1495 	while (done < size) {
1496 	     amount = size - done;
1497 	     if (amount > maxatonce) {
1498 		     amount = maxatonce;
1499 	     }
1500 	     result = wd_trim(wd, bno + done, amount);
1501 	     if (result) {
1502 		     return result;
1503 	     }
1504 	     done += amount;
1505 	}
1506 	return 0;
1507 }
1508 
1509 static int
1510 wddiscard(dev_t dev, off_t pos, off_t len)
1511 {
1512 	struct wd_softc *wd;
1513 	struct dk_softc *dksc;
1514 	int unit;
1515 
1516 	unit = WDUNIT(dev);
1517 	wd = device_lookup_private(&wd_cd, unit);
1518 	dksc = &wd->sc_dksc;
1519 
1520 	return dk_discard(dksc, dev, pos, len);
1521 }
1522 
1523 #ifdef B_FORMAT
1524 int
1525 wdformat(struct buf *bp)
1526 {
1527 
1528 	bp->b_flags |= B_FORMAT;
1529 	return wdstrategy(bp);
1530 }
1531 #endif
1532 
1533 int
1534 wdsize(dev_t dev)
1535 {
1536 	struct wd_softc *wd;
1537 	struct dk_softc *dksc;
1538 	int unit;
1539 
1540 	ATADEBUG_PRINT(("wdsize\n"), DEBUG_FUNCS);
1541 
1542 	unit = WDUNIT(dev);
1543 	wd = device_lookup_private(&wd_cd, unit);
1544 	if (wd == NULL)
1545 		return (-1);
1546 	dksc = &wd->sc_dksc;
1547 
1548 	if (!device_is_active(dksc->sc_dev))
1549 		return (-1);
1550 
1551 	return dk_size(dksc, dev);
1552 }
1553 
1554 /*
1555  * Dump core after a system crash.
1556  */
1557 static int
1558 wddump(dev_t dev, daddr_t blkno, void *va, size_t size)
1559 {
1560 	struct wd_softc *wd;
1561 	struct dk_softc *dksc;
1562 	int unit;
1563 
1564 	/* Check if recursive dump; if so, punt. */
1565 	if (wddoingadump)
1566 		return EFAULT;
1567 	wddoingadump = 1;
1568 
1569 	unit = WDUNIT(dev);
1570 	wd = device_lookup_private(&wd_cd, unit);
1571 	if (wd == NULL)
1572 		return (ENXIO);
1573 	dksc = &wd->sc_dksc;
1574 
1575 	return dk_dump(dksc, dev, blkno, va, size, 0);
1576 }
1577 
1578 static int
1579 wd_dumpblocks(device_t dev, void *va, daddr_t blkno, int nblk)
1580 {
1581 	struct wd_softc *wd = device_private(dev);
1582 	struct dk_softc *dksc = &wd->sc_dksc;
1583 	struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
1584 	struct ata_xfer *xfer = &wd->dump_xfer;
1585 	int err;
1586 
1587 	/* Recalibrate, if first dump transfer. */
1588 	if (wddumprecalibrated == 0) {
1589 		wddumprecalibrated = 1;
1590 		ata_channel_lock(wd->drvp->chnl_softc);
1591 		/* This will directly execute the reset due to AT_POLL */
1592 		ata_thread_run(wd->drvp->chnl_softc, AT_POLL,
1593 		    ATACH_TH_DRIVE_RESET, wd->drvp->drive);
1594 
1595 		wd->drvp->state = RESET;
1596 		ata_channel_unlock(wd->drvp->chnl_softc);
1597 	}
1598 
1599 	memset(xfer, 0, sizeof(*xfer));
1600 	xfer->c_flags |= C_PRIVATE_ALLOC | C_SKIP_QUEUE;
1601 
1602 	xfer->c_bio.blkno = blkno;
1603 	xfer->c_bio.flags = ATA_POLL;
1604 	if (wd->sc_flags & WDF_LBA48 &&
1605 	    (xfer->c_bio.blkno + nblk) > wd->sc_capacity28)
1606 		xfer->c_bio.flags |= ATA_LBA48;
1607 	if (wd->sc_flags & WDF_LBA)
1608 		xfer->c_bio.flags |= ATA_LBA;
1609 	xfer->c_bio.bcount = nblk * dg->dg_secsize;
1610 	xfer->c_bio.databuf = va;
1611 #ifndef WD_DUMP_NOT_TRUSTED
1612 	/* This will poll until the bio is complete */
1613 	wd->atabus->ata_bio(wd->drvp, xfer);
1614 
1615 	switch(err = xfer->c_bio.error) {
1616 	case TIMEOUT:
1617 		printf("wddump: device timed out");
1618 		err = EIO;
1619 		break;
1620 	case ERR_DF:
1621 		printf("wddump: drive fault");
1622 		err = EIO;
1623 		break;
1624 	case ERR_DMA:
1625 		printf("wddump: DMA error");
1626 		err = EIO;
1627 		break;
1628 	case ERROR:
1629 		printf("wddump: ");
1630 		wdperror(wd, xfer);
1631 		err = EIO;
1632 		break;
1633 	case NOERROR:
1634 		err = 0;
1635 		break;
1636 	default:
1637 		panic("wddump: unknown error type %x", err);
1638 	}
1639 
1640 	if (err != 0) {
1641 		printf("\n");
1642 		return err;
1643 	}
1644 #else	/* WD_DUMP_NOT_TRUSTED */
1645 	/* Let's just talk about this first... */
1646 	printf("wd%d: dump addr 0x%x, cylin %d, head %d, sector %d\n",
1647 	    unit, va, cylin, head, sector);
1648 	delay(500 * 1000);	/* half a second */
1649 #endif
1650 
1651 	wddoingadump = 0;
1652 	return 0;
1653 }
1654 
1655 #ifdef HAS_BAD144_HANDLING
1656 /*
1657  * Internalize the bad sector table.
1658  */
1659 void
1660 bad144intern(struct wd_softc *wd)
1661 {
1662 	struct dk_softc *dksc = &wd->sc_dksc;
1663 	struct dkbad *bt = &dksc->sc_dkdev.dk_cpulabel->bad;
1664 	struct disklabel *lp = dksc->sc_dkdev.dk_label;
1665 	int i = 0;
1666 
1667 	ATADEBUG_PRINT(("bad144intern\n"), DEBUG_XFERS);
1668 
1669 	for (; i < NBT_BAD; i++) {
1670 		if (bt->bt_bad[i].bt_cyl == 0xffff)
1671 			break;
1672 		wd->drvp->badsect[i] =
1673 		    bt->bt_bad[i].bt_cyl * lp->d_secpercyl +
1674 		    (bt->bt_bad[i].bt_trksec >> 8) * lp->d_nsectors +
1675 		    (bt->bt_bad[i].bt_trksec & 0xff);
1676 	}
1677 	for (; i < NBT_BAD+1; i++)
1678 		wd->drvp->badsect[i] = -1;
1679 }
1680 #endif
1681 
1682 static void
1683 wd_set_geometry(struct wd_softc *wd)
1684 {
1685 	struct dk_softc *dksc = &wd->sc_dksc;
1686 	struct disk_geom *dg = &dksc->sc_dkdev.dk_geom;
1687 
1688 	memset(dg, 0, sizeof(*dg));
1689 
1690 	dg->dg_secperunit = wd->sc_capacity;
1691 	dg->dg_secsize = wd->sc_blksize;
1692 	dg->dg_nsectors = wd->sc_params.atap_sectors;
1693 	dg->dg_ntracks = wd->sc_params.atap_heads;
1694 	if ((wd->sc_flags & WDF_LBA) == 0)
1695 		dg->dg_ncylinders = wd->sc_params.atap_cylinders;
1696 
1697 	disk_set_info(dksc->sc_dev, &dksc->sc_dkdev, wd->sc_typename);
1698 }
1699 
1700 int
1701 wd_get_params(struct wd_softc *wd, struct ataparams *params)
1702 {
1703 	int retry = 0;
1704 	struct ata_channel *chp = wd->drvp->chnl_softc;
1705 	const int flags = AT_WAIT;
1706 
1707 again:
1708 	switch (wd->atabus->ata_get_params(wd->drvp, flags, params)) {
1709 	case CMD_AGAIN:
1710 		return 1;
1711 	case CMD_ERR:
1712 		if (retry == 0) {
1713 			retry++;
1714 			ata_channel_lock(chp);
1715 			(*wd->atabus->ata_reset_drive)(wd->drvp, flags, NULL);
1716 			ata_channel_unlock(chp);
1717 			goto again;
1718 		}
1719 
1720 		if (wd->drvp->drive_type != ATA_DRIVET_OLD)
1721 			return 1;
1722 		/*
1723 		 * We `know' there's a drive here; just assume it's old.
1724 		 * This geometry is only used to read the MBR and print a
1725 		 * (false) attach message.
1726 		 */
1727 		strncpy(params->atap_model, "ST506",
1728 		    sizeof params->atap_model);
1729 		params->atap_config = ATA_CFG_FIXED;
1730 		params->atap_cylinders = 1024;
1731 		params->atap_heads = 8;
1732 		params->atap_sectors = 17;
1733 		params->atap_multi = 1;
1734 		params->atap_capabilities1 = params->atap_capabilities2 = 0;
1735 		wd->drvp->ata_vers = -1; /* Mark it as pre-ATA */
1736 		/* FALLTHROUGH */
1737 	case CMD_OK:
1738 		return 0;
1739 	default:
1740 		panic("wd_get_params: bad return code from ata_get_params");
1741 		/* NOTREACHED */
1742 	}
1743 }
1744 
1745 int
1746 wd_getcache(struct wd_softc *wd, int *bitsp)
1747 {
1748 	struct ataparams params;
1749 
1750 	if (wd_get_params(wd, &params) != 0)
1751 		return EIO;
1752 	if (params.atap_cmd_set1 == 0x0000 ||
1753 	    params.atap_cmd_set1 == 0xffff ||
1754 	    (params.atap_cmd_set1 & WDC_CMD1_CACHE) == 0) {
1755 		*bitsp = 0;
1756 		return 0;
1757 	}
1758 	*bitsp = DKCACHE_WCHANGE | DKCACHE_READ;
1759 	if (params.atap_cmd1_en & WDC_CMD1_CACHE)
1760 		*bitsp |= DKCACHE_WRITE;
1761 
1762 	if (WD_USE_NCQ(wd) || (wd->drvp->drive_flags & ATA_DRIVE_WFUA))
1763 		*bitsp |= DKCACHE_FUA;
1764 
1765 	return 0;
1766 }
1767 
1768 
1769 static int
1770 wd_check_error(const struct dk_softc *dksc, const struct ata_xfer *xfer,
1771     const char *func)
1772 {
1773 	static const char at_errbits[] = "\20\10ERROR\11TIMEOU\12DF";
1774 
1775 	int flags = xfer->c_ata_c.flags;
1776 
1777 	if ((flags & AT_ERROR) != 0 && xfer->c_ata_c.r_error == WDCE_ABRT) {
1778 		/* command not supported */
1779 		aprint_debug_dev(dksc->sc_dev, "%s: not supported\n", func);
1780 		return ENODEV;
1781 	}
1782 	if (flags & (AT_ERROR | AT_TIMEOU | AT_DF)) {
1783 		char sbuf[sizeof(at_errbits) + 64];
1784 		snprintb(sbuf, sizeof(sbuf), at_errbits, flags);
1785 		aprint_error_dev(dksc->sc_dev, "%s: status=%s\n", func, sbuf);
1786 		return EIO;
1787 	}
1788 	return 0;
1789 }
1790 
1791 int
1792 wd_setcache(struct wd_softc *wd, int bits)
1793 {
1794 	struct dk_softc *dksc = &wd->sc_dksc;
1795 	struct ataparams params;
1796 	struct ata_xfer *xfer;
1797 	int error;
1798 
1799 	if (wd_get_params(wd, &params) != 0)
1800 		return EIO;
1801 
1802 	if (params.atap_cmd_set1 == 0x0000 ||
1803 	    params.atap_cmd_set1 == 0xffff ||
1804 	    (params.atap_cmd_set1 & WDC_CMD1_CACHE) == 0)
1805 		return EOPNOTSUPP;
1806 
1807 	if ((bits & DKCACHE_READ) == 0 ||
1808 	    (bits & DKCACHE_SAVE) != 0)
1809 		return EOPNOTSUPP;
1810 
1811 	xfer = ata_get_xfer(wd->drvp->chnl_softc, true);
1812 
1813 	xfer->c_ata_c.r_command = SET_FEATURES;
1814 	xfer->c_ata_c.r_st_bmask = 0;
1815 	xfer->c_ata_c.r_st_pmask = 0;
1816 	xfer->c_ata_c.timeout = 30000; /* 30s timeout */
1817 	xfer->c_ata_c.flags = AT_WAIT;
1818 	if (bits & DKCACHE_WRITE)
1819 		xfer->c_ata_c.r_features = WDSF_WRITE_CACHE_EN;
1820 	else
1821 		xfer->c_ata_c.r_features = WDSF_WRITE_CACHE_DS;
1822 
1823 	wd->atabus->ata_exec_command(wd->drvp, xfer);
1824 	ata_wait_cmd(wd->drvp->chnl_softc, xfer);
1825 
1826 	error = wd_check_error(dksc, xfer, __func__);
1827 	ata_free_xfer(wd->drvp->chnl_softc, xfer);
1828 	return error;
1829 }
1830 
1831 static int
1832 wd_standby(struct wd_softc *wd, int flags)
1833 {
1834 	struct dk_softc *dksc = &wd->sc_dksc;
1835 	struct ata_xfer *xfer;
1836 	int error;
1837 
1838 	aprint_debug_dev(dksc->sc_dev, "standby immediate\n");
1839 	xfer = ata_get_xfer(wd->drvp->chnl_softc, true);
1840 
1841 	xfer->c_ata_c.r_command = WDCC_STANDBY_IMMED;
1842 	xfer->c_ata_c.r_st_bmask = WDCS_DRDY;
1843 	xfer->c_ata_c.r_st_pmask = WDCS_DRDY;
1844 	xfer->c_ata_c.flags = flags;
1845 	xfer->c_ata_c.timeout = 30000; /* 30s timeout */
1846 
1847 	wd->atabus->ata_exec_command(wd->drvp, xfer);
1848 	ata_wait_cmd(wd->drvp->chnl_softc, xfer);
1849 
1850 	error = wd_check_error(dksc, xfer, __func__);
1851 	ata_free_xfer(wd->drvp->chnl_softc, xfer);
1852 	return error;
1853 }
1854 
1855 int
1856 wd_flushcache(struct wd_softc *wd, int flags)
1857 {
1858 	struct dk_softc *dksc = &wd->sc_dksc;
1859 	struct ata_xfer *xfer;
1860 	int error;
1861 
1862 	/*
1863 	 * WDCC_FLUSHCACHE is here since ATA-4, but some drives report
1864 	 * only ATA-2 and still support it.
1865 	 */
1866 	if (wd->drvp->ata_vers < 4 &&
1867 	    ((wd->sc_params.atap_cmd_set2 & WDC_CMD2_FC) == 0 ||
1868 	    wd->sc_params.atap_cmd_set2 == 0xffff))
1869 		return ENODEV;
1870 
1871 	xfer = ata_get_xfer(wd->drvp->chnl_softc, true);
1872 
1873 	if ((wd->sc_params.atap_cmd2_en & ATA_CMD2_LBA48) != 0 &&
1874 	    (wd->sc_params.atap_cmd2_en & ATA_CMD2_FCE) != 0) {
1875 		xfer->c_ata_c.r_command = WDCC_FLUSHCACHE_EXT;
1876 		flags |= AT_LBA48;
1877 	} else
1878 		xfer->c_ata_c.r_command = WDCC_FLUSHCACHE;
1879 	xfer->c_ata_c.r_st_bmask = WDCS_DRDY;
1880 	xfer->c_ata_c.r_st_pmask = WDCS_DRDY;
1881 	xfer->c_ata_c.flags = flags | AT_READREG;
1882 	xfer->c_ata_c.timeout = 300000; /* 5m timeout */
1883 
1884 	wd->atabus->ata_exec_command(wd->drvp, xfer);
1885 	ata_wait_cmd(wd->drvp->chnl_softc, xfer);
1886 
1887 	error = wd_check_error(dksc, xfer, __func__);
1888 	wd->sc_flags &= ~WDF_DIRTY;
1889 	ata_free_xfer(wd->drvp->chnl_softc, xfer);
1890 	return error;
1891 }
1892 
1893 /*
1894  * Execute TRIM command, assumes sleep context.
1895  */
1896 static int
1897 wd_trim(struct wd_softc *wd, daddr_t bno, long size)
1898 {
1899 	struct dk_softc *dksc = &wd->sc_dksc;
1900 	struct ata_xfer *xfer;
1901 	int error;
1902 	unsigned char *req;
1903 
1904 	xfer = ata_get_xfer(wd->drvp->chnl_softc, true);
1905 
1906 	req = kmem_zalloc(512, KM_SLEEP);
1907 	req[0] = bno & 0xff;
1908 	req[1] = (bno >> 8) & 0xff;
1909 	req[2] = (bno >> 16) & 0xff;
1910 	req[3] = (bno >> 24) & 0xff;
1911 	req[4] = (bno >> 32) & 0xff;
1912 	req[5] = (bno >> 40) & 0xff;
1913 	req[6] = size & 0xff;
1914 	req[7] = (size >> 8) & 0xff;
1915 
1916 	/*
1917  	 * XXX We could possibly use NCQ TRIM, which supports executing
1918  	 * this command concurrently. It would need some investigation, some
1919  	 * early or not so early disk firmware caused data loss with NCQ TRIM.
1920 	 * atastart() et.al would need to be adjusted to allow and support
1921 	 * running several non-I/O ATA commands in parallel.
1922 	 */
1923 
1924 	xfer->c_ata_c.r_command = ATA_DATA_SET_MANAGEMENT;
1925 	xfer->c_ata_c.r_count = 1;
1926 	xfer->c_ata_c.r_features = ATA_SUPPORT_DSM_TRIM;
1927 	xfer->c_ata_c.r_st_bmask = WDCS_DRDY;
1928 	xfer->c_ata_c.r_st_pmask = WDCS_DRDY;
1929 	xfer->c_ata_c.timeout = 30000; /* 30s timeout */
1930 	xfer->c_ata_c.data = req;
1931 	xfer->c_ata_c.bcount = 512;
1932 	xfer->c_ata_c.flags |= AT_WRITE | AT_WAIT;
1933 
1934 	wd->atabus->ata_exec_command(wd->drvp, xfer);
1935 	ata_wait_cmd(wd->drvp->chnl_softc, xfer);
1936 
1937 	kmem_free(req, 512);
1938 	error = wd_check_error(dksc, xfer, __func__);
1939 	ata_free_xfer(wd->drvp->chnl_softc, xfer);
1940 	return error;
1941 }
1942 
1943 bool
1944 wd_shutdown(device_t dev, int how)
1945 {
1946 	struct wd_softc *wd = device_private(dev);
1947 
1948 	/* the adapter needs to be enabled */
1949 	if (wd->atabus->ata_addref(wd->drvp))
1950 		return true; /* no need to complain */
1951 
1952 	wd_flushcache(wd, AT_POLL);
1953 	if ((how & RB_POWERDOWN) == RB_POWERDOWN)
1954 		wd_standby(wd, AT_POLL);
1955 	return true;
1956 }
1957 
1958 /*
1959  * Allocate space for a ioctl queue structure.  Mostly taken from
1960  * scsipi_ioctl.c
1961  */
1962 struct wd_ioctl *
1963 wi_get(struct wd_softc *wd)
1964 {
1965 	struct wd_ioctl *wi;
1966 
1967 	wi = kmem_zalloc(sizeof(struct wd_ioctl), KM_SLEEP);
1968 	wi->wi_softc = wd;
1969 	buf_init(&wi->wi_bp);
1970 
1971 	return (wi);
1972 }
1973 
1974 /*
1975  * Free an ioctl structure and remove it from our list
1976  */
1977 
1978 void
1979 wi_free(struct wd_ioctl *wi)
1980 {
1981 	buf_destroy(&wi->wi_bp);
1982 	kmem_free(wi, sizeof(*wi));
1983 }
1984 
1985 /*
1986  * Find a wd_ioctl structure based on the struct buf.
1987  */
1988 
1989 struct wd_ioctl *
1990 wi_find(struct buf *bp)
1991 {
1992 	return container_of(bp, struct wd_ioctl, wi_bp);
1993 }
1994 
1995 static uint
1996 wi_sector_size(const struct wd_ioctl * const wi)
1997 {
1998 	switch (wi->wi_atareq.command) {
1999 	case WDCC_READ:
2000 	case WDCC_WRITE:
2001 	case WDCC_READMULTI:
2002 	case WDCC_WRITEMULTI:
2003 	case WDCC_READDMA:
2004 	case WDCC_WRITEDMA:
2005 	case WDCC_READ_EXT:
2006 	case WDCC_WRITE_EXT:
2007 	case WDCC_READMULTI_EXT:
2008 	case WDCC_WRITEMULTI_EXT:
2009 	case WDCC_READDMA_EXT:
2010 	case WDCC_WRITEDMA_EXT:
2011 	case WDCC_READ_FPDMA_QUEUED:
2012 	case WDCC_WRITE_FPDMA_QUEUED:
2013 		return wi->wi_softc->sc_blksize;
2014 	default:
2015 		return 512;
2016 	}
2017 }
2018 
2019 /*
2020  * Ioctl pseudo strategy routine
2021  *
2022  * This is mostly stolen from scsipi_ioctl.c:scsistrategy().  What
2023  * happens here is:
2024  *
2025  * - wdioctl() queues a wd_ioctl structure.
2026  *
2027  * - wdioctl() calls physio/wdioctlstrategy based on whether or not
2028  *   user space I/O is required.  If physio() is called, physio() eventually
2029  *   calls wdioctlstrategy().
2030  *
2031  * - In either case, wdioctlstrategy() calls wd->atabus->ata_exec_command()
2032  *   to perform the actual command
2033  *
2034  * The reason for the use of the pseudo strategy routine is because
2035  * when doing I/O to/from user space, physio _really_ wants to be in
2036  * the loop.  We could put the entire buffer into the ioctl request
2037  * structure, but that won't scale if we want to do things like download
2038  * microcode.
2039  */
2040 
2041 void
2042 wdioctlstrategy(struct buf *bp)
2043 {
2044 	struct wd_ioctl *wi;
2045 	struct ata_xfer *xfer;
2046 	int error = 0;
2047 
2048 	wi = wi_find(bp);
2049 	if (wi == NULL) {
2050 		printf("wdioctlstrategy: "
2051 		    "No matching ioctl request found in queue\n");
2052 		error = EINVAL;
2053 		goto out2;
2054 	}
2055 
2056 	xfer = ata_get_xfer(wi->wi_softc->drvp->chnl_softc, true);
2057 
2058 	/*
2059 	 * Abort if physio broke up the transfer
2060 	 */
2061 
2062 	if (bp->b_bcount != wi->wi_atareq.datalen) {
2063 		printf("physio split wd ioctl request... cannot proceed\n");
2064 		error = EIO;
2065 		goto out;
2066 	}
2067 
2068 	/*
2069 	 * Abort if we didn't get a buffer size that was a multiple of
2070 	 * our sector size (or overflows CHS/LBA28 sector count)
2071 	 */
2072 
2073 	if ((bp->b_bcount % wi_sector_size(wi)) != 0 ||
2074 	    (bp->b_bcount / wi_sector_size(wi)) >=
2075 	     (1 << NBBY)) {
2076 		error = EINVAL;
2077 		goto out;
2078 	}
2079 
2080 	/*
2081 	 * Make sure a timeout was supplied in the ioctl request
2082 	 */
2083 
2084 	if (wi->wi_atareq.timeout == 0) {
2085 		error = EINVAL;
2086 		goto out;
2087 	}
2088 
2089 	if (wi->wi_atareq.flags & ATACMD_READ)
2090 		xfer->c_ata_c.flags |= AT_READ;
2091 	else if (wi->wi_atareq.flags & ATACMD_WRITE)
2092 		xfer->c_ata_c.flags |= AT_WRITE;
2093 
2094 	if (wi->wi_atareq.flags & ATACMD_READREG)
2095 		xfer->c_ata_c.flags |= AT_READREG;
2096 
2097 	if ((wi->wi_atareq.flags & ATACMD_LBA) != 0)
2098 		xfer->c_ata_c.flags |= AT_LBA;
2099 
2100 	xfer->c_ata_c.flags |= AT_WAIT;
2101 
2102 	xfer->c_ata_c.timeout = wi->wi_atareq.timeout;
2103 	xfer->c_ata_c.r_command = wi->wi_atareq.command;
2104 	xfer->c_ata_c.r_lba = ((wi->wi_atareq.head & 0x0f) << 24) |
2105 	    (wi->wi_atareq.cylinder << 8) |
2106 	    wi->wi_atareq.sec_num;
2107 	xfer->c_ata_c.r_count = wi->wi_atareq.sec_count;
2108 	xfer->c_ata_c.r_features = wi->wi_atareq.features;
2109 	xfer->c_ata_c.r_st_bmask = WDCS_DRDY;
2110 	xfer->c_ata_c.r_st_pmask = WDCS_DRDY;
2111 	xfer->c_ata_c.data = wi->wi_bp.b_data;
2112 	xfer->c_ata_c.bcount = wi->wi_bp.b_bcount;
2113 
2114 	wi->wi_softc->atabus->ata_exec_command(wi->wi_softc->drvp, xfer);
2115 	ata_wait_cmd(wi->wi_softc->drvp->chnl_softc, xfer);
2116 
2117 	if (xfer->c_ata_c.flags & (AT_ERROR | AT_TIMEOU | AT_DF)) {
2118 		if (xfer->c_ata_c.flags & AT_ERROR) {
2119 			wi->wi_atareq.retsts = ATACMD_ERROR;
2120 			wi->wi_atareq.error = xfer->c_ata_c.r_error;
2121 		} else if (xfer->c_ata_c.flags & AT_DF)
2122 			wi->wi_atareq.retsts = ATACMD_DF;
2123 		else
2124 			wi->wi_atareq.retsts = ATACMD_TIMEOUT;
2125 	} else {
2126 		wi->wi_atareq.retsts = ATACMD_OK;
2127 		if (wi->wi_atareq.flags & ATACMD_READREG) {
2128 			wi->wi_atareq.command = xfer->c_ata_c.r_status;
2129 			wi->wi_atareq.features = xfer->c_ata_c.r_error;
2130 			wi->wi_atareq.sec_count = xfer->c_ata_c.r_count;
2131 			wi->wi_atareq.sec_num = xfer->c_ata_c.r_lba & 0xff;
2132 			wi->wi_atareq.head = (xfer->c_ata_c.r_device & 0xf0) |
2133 			    ((xfer->c_ata_c.r_lba >> 24) & 0x0f);
2134 			wi->wi_atareq.cylinder =
2135 			    (xfer->c_ata_c.r_lba >> 8) & 0xffff;
2136 			wi->wi_atareq.error = xfer->c_ata_c.r_error;
2137 		}
2138 	}
2139 
2140 out:
2141 	ata_free_xfer(wi->wi_softc->drvp->chnl_softc, xfer);
2142 out2:
2143 	bp->b_error = error;
2144 	if (error)
2145 		bp->b_resid = bp->b_bcount;
2146 	biodone(bp);
2147 }
2148 
2149 static void
2150 wd_sysctl_attach(struct wd_softc *wd)
2151 {
2152 	struct dk_softc *dksc = &wd->sc_dksc;
2153 	const struct sysctlnode *node;
2154 	int error;
2155 
2156 	/* sysctl set-up */
2157 	if (sysctl_createv(&wd->nodelog, 0, NULL, &node,
2158 				0, CTLTYPE_NODE, dksc->sc_xname,
2159 				SYSCTL_DESCR("wd driver settings"),
2160 				NULL, 0, NULL, 0,
2161 				CTL_HW, CTL_CREATE, CTL_EOL) != 0) {
2162 		aprint_error_dev(dksc->sc_dev,
2163 		    "could not create %s.%s sysctl node\n",
2164 		    "hw", dksc->sc_xname);
2165 		return;
2166 	}
2167 
2168 	wd->drv_ncq = true;
2169 	if ((error = sysctl_createv(&wd->nodelog, 0, NULL, NULL,
2170 				CTLFLAG_READWRITE, CTLTYPE_BOOL, "use_ncq",
2171 				SYSCTL_DESCR("use NCQ if supported"),
2172 				NULL, 0, &wd->drv_ncq, 0,
2173 				CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL))
2174 				!= 0) {
2175 		aprint_error_dev(dksc->sc_dev,
2176 		    "could not create %s.%s.use_ncq sysctl - error %d\n",
2177 		    "hw", dksc->sc_xname, error);
2178 		return;
2179 	}
2180 
2181 	wd->drv_ncq_prio = false;
2182 	if ((error = sysctl_createv(&wd->nodelog, 0, NULL, NULL,
2183 				CTLFLAG_READWRITE, CTLTYPE_BOOL, "use_ncq_prio",
2184 				SYSCTL_DESCR("use NCQ PRIORITY if supported"),
2185 				NULL, 0, &wd->drv_ncq_prio, 0,
2186 				CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL))
2187 				!= 0) {
2188 		aprint_error_dev(dksc->sc_dev,
2189 		    "could not create %s.%s.use_ncq_prio sysctl - error %d\n",
2190 		    "hw", dksc->sc_xname, error);
2191 		return;
2192 	}
2193 
2194 #ifdef WD_CHAOS_MONKEY
2195 	wd->drv_chaos_freq = 0;
2196 	if ((error = sysctl_createv(&wd->nodelog, 0, NULL, NULL,
2197 				CTLFLAG_READWRITE, CTLTYPE_INT, "chaos_freq",
2198 				SYSCTL_DESCR("simulated bio read error rate"),
2199 				NULL, 0, &wd->drv_chaos_freq, 0,
2200 				CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL))
2201 				!= 0) {
2202 		aprint_error_dev(dksc->sc_dev,
2203 		    "could not create %s.%s.chaos_freq sysctl - error %d\n",
2204 		    "hw", dksc->sc_xname, error);
2205 		return;
2206 	}
2207 
2208 	wd->drv_chaos_cnt = 0;
2209 	if ((error = sysctl_createv(&wd->nodelog, 0, NULL, NULL,
2210 				CTLFLAG_READONLY, CTLTYPE_INT, "chaos_cnt",
2211 				SYSCTL_DESCR("number of processed bio reads"),
2212 				NULL, 0, &wd->drv_chaos_cnt, 0,
2213 				CTL_HW, node->sysctl_num, CTL_CREATE, CTL_EOL))
2214 				!= 0) {
2215 		aprint_error_dev(dksc->sc_dev,
2216 		    "could not create %s.%s.chaos_cnt sysctl - error %d\n",
2217 		    "hw", dksc->sc_xname, error);
2218 		return;
2219 	}
2220 #endif
2221 
2222 }
2223 
2224 static void
2225 wd_sysctl_detach(struct wd_softc *wd)
2226 {
2227 	sysctl_teardown(&wd->nodelog);
2228 }
2229 
2230 #ifdef ATADEBUG
2231 int wddebug(void);
2232 
2233 int
2234 wddebug(void)
2235 {
2236 	struct wd_softc *wd;
2237 	  struct dk_softc *dksc;
2238 	  int unit;
2239 
2240 	  for (unit = 0; unit <= 3; unit++) {
2241 		    wd = device_lookup_private(&wd_cd, unit);
2242 		    if (wd == NULL)
2243 				continue;
2244 		    dksc = &wd->sc_dksc;
2245 		printf("%s fl %x bufq %p:\n",
2246 		    dksc->sc_xname, wd->sc_flags, bufq_peek(dksc->sc_bufq));
2247 
2248 		atachannel_debug(wd->drvp->chnl_softc);
2249 	}
2250 	return 0;
2251 }
2252 #endif /* ATADEBUG */
2253