1 /* Advanced Host Controller Interface (AHCI) driver, by D.C. van Moolenbroek
2 * - Multithreading support by Arne Welzel
3 * - Native Command Queuing support by Raja Appuswamy
4 */
5 /*
6 * This driver is based on the following specifications:
7 * - Serial ATA Advanced Host Controller Interface (AHCI) 1.3
8 * - Serial ATA Revision 2.6
9 * - AT Attachment with Packet Interface 7 (ATA/ATAPI-7)
10 * - ATAPI Removable Rewritable Media Devices 1.3 (SFF-8070)
11 *
12 * The driver supports device hot-plug, active device status tracking,
13 * nonremovable ATA and removable ATAPI devices, custom logical sector sizes,
14 * sector-unaligned reads, native command queuing and parallel requests to
15 * different devices.
16 *
17 * It does not implement transparent failure recovery, power management, or
18 * port multiplier support.
19 */
20 /*
21 * An AHCI controller exposes a number of ports (up to 32), each of which may
22 * or may not have one device attached (port multipliers are not supported).
23 * Each port is maintained independently.
24 *
25 * The following figure depicts the possible transitions between port states.
26 * The NO_PORT state is not included; no transitions can be made from or to it.
27 *
28 * +----------+ +----------+
29 * | SPIN_UP | ------+ +-----> | BAD_DEV | ------------------+
30 * +----------+ | | +----------+ |
31 * | | | ^ |
32 * v v | | |
33 * +----------+ +----------+ +----------+ +----------+ |
34 * | NO_DEV | --> | WAIT_DEV | --> | WAIT_ID | --> | GOOD_DEV | |
35 * +----------+ +----------+ +----------+ +----------+ |
36 * ^ | | | |
37 * +----------------+----------------+----------------+--------+
38 *
39 * At driver startup, all physically present ports are put in SPIN_UP state.
40 * This state differs from NO_DEV in that BDEV_OPEN calls will be deferred
41 * until either the spin-up timer expires, or a device has been identified on
42 * that port. This prevents early BDEV_OPEN calls from failing erroneously at
43 * startup time if the device has not yet been able to announce its presence.
44 *
45 * If a device is detected, either at startup time or after hot-plug, its
46 * signature is checked and it is identified, after which it may be determined
47 * to be a usable ("good") device, which means that the device is considered to
48 * be in a working state. If these steps fail, the device is marked as unusable
49 * ("bad"). At any point in time, the device may be disconnected; the port is
50 * then put back into NO_DEV state.
51 *
52 * A device in working state (GOOD_DEV) may or may not have a medium. All ATA
53 * devices are assumed to be fixed; all ATAPI devices are assumed to have
54 * removable media. To prevent erroneous access to switched devices and media,
55 * the driver makes devices inaccessible until they are fully closed (the open
56 * count is zero) when a device (hot-plug) or medium change is detected.
57 * For hot-plug changes, access is prevented by setting the BARRIER flag until
58 * the device is fully closed and then reopened. For medium changes, access is
59 * prevented by not acknowledging the medium change until the device is fully
60 * closed and reopened. Removable media are not locked in the drive while
61 * opened, because the driver author is uncomfortable with that concept.
62 *
63 * Ports may leave the group of states where a device is connected (that is,
64 * WAIT_ID, GOOD_DEV, and BAD_DEV) in two ways: either due to a hot-unplug
65 * event, or due to a hard reset after a serious failure. For simplicity, we
66 * we perform a hard reset after a hot-unplug event as well, so that the link
67 * to the device is broken. Thus, in both cases, a transition to NO_DEV is
68 * made, after which the link to the device may or may not be reestablished.
69 * In both cases, ongoing requests are cancelled and the BARRIER flag is set.
70 *
71 * The following table lists for each state, whether the port is started
72 * (PxCMD.ST is set), whether a timer is running, what the PxIE mask is to be
73 * set to, and what BDEV_OPEN calls on this port should return.
74 *
75 * State Started Timer PxIE BDEV_OPEN
76 * --------- --------- --------- --------- ---------
77 * NO_PORT no no (none) ENXIO
78 * SPIN_UP no yes PCE (wait)
79 * NO_DEV no no PCE ENXIO
80 * WAIT_DEV no yes PCE (wait)
81 * BAD_DEV no no PRCE ENXIO
82 * WAIT_ID yes yes PRCE+ (wait)
83 * GOOD_DEV yes per-command PRCE+ OK
84 *
85 * In order to continue deferred BDEV_OPEN calls, the BUSY flag must be unset
86 * when changing from SPIN_UP to any state but WAIT_DEV, and when changing from
87 * WAIT_DEV to any state but WAIT_ID, and when changing from WAIT_ID to any
88 * other state.
89 */
90 /*
91 * The maximum byte size of a single transfer (MAX_TRANSFER) is currently set
92 * to 4MB. This limit has been chosen for a number of reasons:
93 * - The size that can be specified in a Physical Region Descriptor (PRD) is
94 * limited to 4MB for AHCI. Limiting the total transfer size to at most this
95 * size implies that no I/O vector element needs to be split up across PRDs.
96 * This means that the maximum number of needed PRDs can be predetermined.
97 * - The limit is below what can be transferred in a single ATA request, namely
98 * 64k sectors (i.e., at least 32MB). This means that transfer requests need
99 * never be split up into smaller chunks, reducing implementation complexity.
100 * - A single, static timeout can be used for transfers. Very large transfers
101 * can legitimately take up to several minutes -- well beyond the appropriate
102 * timeout range for small transfers. The limit obviates the need for a
103 * timeout scheme that takes into account the transfer size.
104 * - Similarly, the transfer limit reduces the opportunity for buggy/malicious
105 * clients to keep the driver busy for a long time with a single request.
106 * - The limit is high enough for all practical purposes. The transfer setup
107 * overhead is already relatively negligible at this size, and even larger
108 * requests will not help maximize throughput. As NR_IOREQS is currently set
109 * to 64, the limit still allows file systems to perform I/O requests with
110 * vectors completely filled with 64KB-blocks.
111 */
112 #include <minix/drivers.h>
113 #include <minix/blockdriver_mt.h>
114 #include <minix/drvlib.h>
115 #include <machine/pci.h>
116 #include <sys/ioc_disk.h>
117 #include <sys/mman.h>
118 #include <assert.h>
119
120 #include "ahci.h"
121
122 /* Host Bus Adapter (HBA) state. */
123 static struct {
124 volatile u32_t *base; /* base address of memory-mapped registers */
125 size_t size; /* size of memory-mapped register area */
126
127 int nr_ports; /* addressable number of ports (1..NR_PORTS) */
128 int nr_cmds; /* maximum number of commands per port */
129 int has_ncq; /* NCQ support flag */
130 int has_clo; /* CLO support flag */
131
132 int irq; /* IRQ number */
133 int hook_id; /* IRQ hook ID */
134 } hba_state;
135
136 #define hba_read(r) (hba_state.base[r])
137 #define hba_write(r, v) (hba_state.base[r] = (v))
138
139 /* Port state. */
140 static struct port_state {
141 int state; /* port state */
142 unsigned int flags; /* port flags */
143
144 volatile u32_t *reg; /* memory-mapped port registers */
145
146 u8_t *mem_base; /* primary memory buffer virtual address */
147 phys_bytes mem_phys; /* primary memory buffer physical address */
148 vir_bytes mem_size; /* primary memory buffer size */
149
150 /* the FIS, CL, CT[0] and TMP buffers are all in the primary buffer */
151 u32_t *fis_base; /* FIS receive buffer virtual address */
152 phys_bytes fis_phys; /* FIS receive buffer physical address */
153 u32_t *cl_base; /* command list buffer virtual address */
154 phys_bytes cl_phys; /* command list buffer physical address */
155 u8_t *ct_base[NR_CMDS]; /* command table virtual address */
156 phys_bytes ct_phys[NR_CMDS]; /* command table physical address */
157 u8_t *tmp_base; /* temporary storage buffer virtual address */
158 phys_bytes tmp_phys; /* temporary storage buffer physical address */
159
160 u8_t *pad_base; /* sector padding buffer virtual address */
161 phys_bytes pad_phys; /* sector padding buffer physical address */
162 vir_bytes pad_size; /* sector padding buffer size */
163
164 u64_t lba_count; /* number of valid Logical Block Addresses */
165 u32_t sector_size; /* medium sector size in bytes */
166
167 int open_count; /* number of times this port is opened */
168
169 int device; /* associated device number, or NO_DEVICE */
170 struct device part[DEV_PER_DRIVE]; /* partition bases and sizes */
171 struct device subpart[SUB_PER_DRIVE]; /* same for subpartitions */
172
173 minix_timer_t timer; /* port-specific timeout timer */
174 int left; /* number of tries left before giving up */
175 /* (only used for signature probing) */
176
177 int queue_depth; /* NCQ queue depth */
178 u32_t pend_mask; /* commands not yet complete */
179 struct {
180 thread_id_t tid;/* ID of the worker thread */
181 minix_timer_t timer; /* timer associated with each request */
182 int result; /* success/failure result of the commands */
183 } cmd_info[NR_CMDS];
184 } port_state[NR_PORTS];
185
186 #define port_read(ps, r) ((ps)->reg[r])
187 #define port_write(ps, r, v) ((ps)->reg[r] = (v))
188
189 static int ahci_instance; /* driver instance number */
190
191 static int ahci_verbose; /* verbosity level (0..4) */
192
193 /* Timeout-related values. */
194 static clock_t ahci_spinup_timeout;
195 static clock_t ahci_device_timeout;
196 static clock_t ahci_device_delay;
197 static unsigned int ahci_device_checks;
198 static clock_t ahci_command_timeout;
199 static clock_t ahci_transfer_timeout;
200 static clock_t ahci_flush_timeout;
201
202 /* Timeout environment variable names and default values. */
203 static struct {
204 char *name; /* environment variable name */
205 u32_t default_ms; /* default in milliseconds */
206 clock_t *ptr; /* clock ticks value pointer */
207 } ahci_timevar[] = {
208 { "ahci_init_timeout", SPINUP_TIMEOUT, &ahci_spinup_timeout },
209 { "ahci_device_timeout", DEVICE_TIMEOUT, &ahci_device_timeout },
210 { "ahci_cmd_timeout", COMMAND_TIMEOUT, &ahci_command_timeout },
211 { "ahci_io_timeout", TRANSFER_TIMEOUT, &ahci_transfer_timeout },
212 { "ahci_flush_timeout", FLUSH_TIMEOUT, &ahci_flush_timeout }
213 };
214
215 static int ahci_map[MAX_DRIVES]; /* device-to-port mapping */
216
217 static int ahci_exiting = FALSE; /* exit after last close? */
218
219 #define BUILD_ARG(port, tag) (((port) << 8) | (tag))
220 #define GET_PORT(arg) ((arg) >> 8)
221 #define GET_TAG(arg) ((arg) & 0xFF)
222
223 #define dprintf(v,s) do { \
224 if (ahci_verbose >= (v)) \
225 printf s; \
226 } while (0)
227
228 /* Convert milliseconds to clock ticks. Round up. */
229 #define millis_to_hz(ms) (((ms) * sys_hz() + 999) / 1000)
230
231 static void port_set_cmd(struct port_state *ps, int cmd, cmd_fis_t *fis,
232 u8_t packet[ATAPI_PACKET_SIZE], prd_t *prdt, int nr_prds, int write);
233 static void port_issue(struct port_state *ps, int cmd, clock_t timeout);
234 static int port_exec(struct port_state *ps, int cmd, clock_t timeout);
235 static void port_timeout(int arg);
236 static void port_disconnect(struct port_state *ps);
237
238 static char *ahci_portname(struct port_state *ps);
239 static int ahci_open(devminor_t minor, int access);
240 static int ahci_close(devminor_t minor);
241 static ssize_t ahci_transfer(devminor_t minor, int do_write, u64_t position,
242 endpoint_t endpt, iovec_t *iovec, unsigned int count, int flags);
243 static struct device *ahci_part(devminor_t minor);
244 static void ahci_alarm(clock_t stamp);
245 static int ahci_ioctl(devminor_t minor, unsigned long request,
246 endpoint_t endpt, cp_grant_id_t grant, endpoint_t user_endpt);
247 static void ahci_intr(unsigned int mask);
248 static int ahci_device(devminor_t minor, device_id_t *id);
249 static struct port_state *ahci_get_port(devminor_t minor);
250
251 /* AHCI driver table. */
252 static struct blockdriver ahci_dtab = {
253 .bdr_type = BLOCKDRIVER_TYPE_DISK,
254 .bdr_open = ahci_open,
255 .bdr_close = ahci_close,
256 .bdr_transfer = ahci_transfer,
257 .bdr_ioctl = ahci_ioctl,
258 .bdr_part = ahci_part,
259 .bdr_intr = ahci_intr,
260 .bdr_alarm = ahci_alarm,
261 .bdr_device = ahci_device
262 };
263
264 /*===========================================================================*
265 * atapi_exec *
266 *===========================================================================*/
atapi_exec(struct port_state * ps,int cmd,u8_t packet[ATAPI_PACKET_SIZE],size_t size,int write)267 static int atapi_exec(struct port_state *ps, int cmd,
268 u8_t packet[ATAPI_PACKET_SIZE], size_t size, int write)
269 {
270 /* Execute an ATAPI command. Return OK or error.
271 */
272 cmd_fis_t fis;
273 prd_t prd[1];
274 int nr_prds = 0;
275
276 assert(size <= AHCI_TMP_SIZE);
277
278 /* Fill in the command table with a FIS, a packet, and if a data
279 * transfer is requested, also a PRD.
280 */
281 memset(&fis, 0, sizeof(fis));
282 fis.cf_cmd = ATA_CMD_PACKET;
283
284 if (size > 0) {
285 fis.cf_feat = ATA_FEAT_PACKET_DMA;
286 if (!write && (ps->flags & FLAG_USE_DMADIR))
287 fis.cf_feat |= ATA_FEAT_PACKET_DMADIR;
288
289 prd[0].vp_addr = ps->tmp_phys;
290 prd[0].vp_size = size;
291 nr_prds++;
292 }
293
294 /* Start the command, and wait for it to complete or fail. */
295 port_set_cmd(ps, cmd, &fis, packet, prd, nr_prds, write);
296
297 return port_exec(ps, cmd, ahci_command_timeout);
298 }
299
300 /*===========================================================================*
301 * atapi_test_unit *
302 *===========================================================================*/
atapi_test_unit(struct port_state * ps,int cmd)303 static int atapi_test_unit(struct port_state *ps, int cmd)
304 {
305 /* Test whether the ATAPI device and medium are ready.
306 */
307 u8_t packet[ATAPI_PACKET_SIZE];
308
309 memset(packet, 0, sizeof(packet));
310 packet[0] = ATAPI_CMD_TEST_UNIT;
311
312 return atapi_exec(ps, cmd, packet, 0, FALSE);
313 }
314
315 /*===========================================================================*
316 * atapi_request_sense *
317 *===========================================================================*/
atapi_request_sense(struct port_state * ps,int cmd,int * sense)318 static int atapi_request_sense(struct port_state *ps, int cmd, int *sense)
319 {
320 /* Request error (sense) information from an ATAPI device, and return
321 * the sense key. The additional sense codes are not used at this time.
322 */
323 u8_t packet[ATAPI_PACKET_SIZE];
324 int r;
325
326 memset(packet, 0, sizeof(packet));
327 packet[0] = ATAPI_CMD_REQUEST_SENSE;
328 packet[4] = ATAPI_REQUEST_SENSE_LEN;
329
330 r = atapi_exec(ps, cmd, packet, ATAPI_REQUEST_SENSE_LEN, FALSE);
331
332 if (r != OK)
333 return r;
334
335 dprintf(V_REQ, ("%s: ATAPI SENSE: sense %x ASC %x ASCQ %x\n",
336 ahci_portname(ps), ps->tmp_base[2] & 0xF, ps->tmp_base[12],
337 ps->tmp_base[13]));
338
339 *sense = ps->tmp_base[2] & 0xF;
340
341 return OK;
342 }
343
344 /*===========================================================================*
345 * atapi_load_eject *
346 *===========================================================================*/
atapi_load_eject(struct port_state * ps,int cmd,int load)347 static int atapi_load_eject(struct port_state *ps, int cmd, int load)
348 {
349 /* Load or eject a medium in an ATAPI device.
350 */
351 u8_t packet[ATAPI_PACKET_SIZE];
352
353 memset(packet, 0, sizeof(packet));
354 packet[0] = ATAPI_CMD_START_STOP;
355 packet[4] = load ? ATAPI_START_STOP_LOAD : ATAPI_START_STOP_EJECT;
356
357 return atapi_exec(ps, cmd, packet, 0, FALSE);
358 }
359
360 /*===========================================================================*
361 * atapi_read_capacity *
362 *===========================================================================*/
atapi_read_capacity(struct port_state * ps,int cmd)363 static int atapi_read_capacity(struct port_state *ps, int cmd)
364 {
365 /* Retrieve the LBA count and sector size of an ATAPI medium.
366 */
367 u8_t packet[ATAPI_PACKET_SIZE], *buf;
368 int r;
369
370 memset(packet, 0, sizeof(packet));
371 packet[0] = ATAPI_CMD_READ_CAPACITY;
372
373 r = atapi_exec(ps, cmd, packet, ATAPI_READ_CAPACITY_LEN, FALSE);
374 if (r != OK)
375 return r;
376
377 /* Store the number of LBA blocks and sector size. */
378 buf = ps->tmp_base;
379 ps->lba_count = (u64_t) ((buf[0] << 24) | (buf[1] << 16) |
380 (buf[2] << 8) | buf[3]) + 1;
381 ps->sector_size =
382 (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7];
383
384 if (ps->sector_size == 0 || (ps->sector_size & 1)) {
385 dprintf(V_ERR, ("%s: invalid medium sector size %u\n",
386 ahci_portname(ps), ps->sector_size));
387
388 return EINVAL;
389 }
390
391 dprintf(V_INFO,
392 ("%s: medium detected (%u byte sectors, %llu MB size)\n",
393 ahci_portname(ps), ps->sector_size,
394 ps->lba_count * ps->sector_size / (1024*1024)));
395
396 return OK;
397 }
398
399 /*===========================================================================*
400 * atapi_check_medium *
401 *===========================================================================*/
atapi_check_medium(struct port_state * ps,int cmd)402 static int atapi_check_medium(struct port_state *ps, int cmd)
403 {
404 /* Check whether a medium is present in a removable-media ATAPI device.
405 * If a new medium is detected, get its total and sector size. Return
406 * OK only if a usable medium is present, and an error otherwise.
407 */
408 int sense;
409
410 /* Perform a readiness check. */
411 if (atapi_test_unit(ps, cmd) != OK) {
412 ps->flags &= ~FLAG_HAS_MEDIUM;
413
414 /* If the check failed due to a unit attention condition, retry
415 * reading the medium capacity. Otherwise, assume that there is
416 * no medium available.
417 */
418 if (atapi_request_sense(ps, cmd, &sense) != OK ||
419 sense != ATAPI_SENSE_UNIT_ATT)
420 return ENXIO;
421 }
422
423 /* If a medium is newly detected, try reading its capacity now. */
424 if (!(ps->flags & FLAG_HAS_MEDIUM)) {
425 if (atapi_read_capacity(ps, cmd) != OK)
426 return EIO;
427
428 ps->flags |= FLAG_HAS_MEDIUM;
429 }
430
431 return OK;
432 }
433
434 /*===========================================================================*
435 * atapi_id_check *
436 *===========================================================================*/
atapi_id_check(struct port_state * ps,u16_t * buf)437 static int atapi_id_check(struct port_state *ps, u16_t *buf)
438 {
439 /* Determine whether we support this ATAPI device based on the
440 * identification data it returned, and store some of its properties.
441 */
442
443 /* The device must be an ATAPI device; it must have removable media;
444 * it must support DMA without DMADIR, or DMADIR for DMA.
445 */
446 if ((buf[ATA_ID_GCAP] & (ATA_ID_GCAP_ATAPI_MASK |
447 ATA_ID_GCAP_REMOVABLE | ATA_ID_GCAP_INCOMPLETE)) !=
448 (ATA_ID_GCAP_ATAPI | ATA_ID_GCAP_REMOVABLE) ||
449 ((buf[ATA_ID_CAP] & ATA_ID_CAP_DMA) != ATA_ID_CAP_DMA &&
450 (buf[ATA_ID_DMADIR] & (ATA_ID_DMADIR_DMADIR |
451 ATA_ID_DMADIR_DMA)) != (ATA_ID_DMADIR_DMADIR |
452 ATA_ID_DMADIR_DMA))) {
453
454 dprintf(V_ERR, ("%s: unsupported ATAPI device\n",
455 ahci_portname(ps)));
456
457 dprintf(V_DEV, ("%s: GCAP %04x CAP %04x DMADIR %04x\n",
458 ahci_portname(ps), buf[ATA_ID_GCAP], buf[ATA_ID_CAP],
459 buf[ATA_ID_DMADIR]));
460
461 return FALSE;
462 }
463
464 /* Remember whether to use the DMADIR flag when appropriate. */
465 if (buf[ATA_ID_DMADIR] & ATA_ID_DMADIR_DMADIR)
466 ps->flags |= FLAG_USE_DMADIR;
467
468 /* ATAPI CD-ROM devices are considered read-only. */
469 if (((buf[ATA_ID_GCAP] & ATA_ID_GCAP_TYPE_MASK) >>
470 ATA_ID_GCAP_TYPE_SHIFT) == ATAPI_TYPE_CDROM)
471 ps->flags |= FLAG_READONLY;
472
473 if ((buf[ATA_ID_SUP1] & ATA_ID_SUP1_VALID_MASK) == ATA_ID_SUP1_VALID &&
474 !(ps->flags & FLAG_READONLY)) {
475 /* Save write cache related capabilities of the device. It is
476 * possible, although unlikely, that a device has support for
477 * either of these but not both.
478 */
479 if (buf[ATA_ID_SUP0] & ATA_ID_SUP0_WCACHE)
480 ps->flags |= FLAG_HAS_WCACHE;
481
482 if (buf[ATA_ID_SUP1] & ATA_ID_SUP1_FLUSH)
483 ps->flags |= FLAG_HAS_FLUSH;
484 }
485
486 return TRUE;
487 }
488
489 /*===========================================================================*
490 * atapi_transfer *
491 *===========================================================================*/
atapi_transfer(struct port_state * ps,int cmd,u64_t start_lba,unsigned int count,int write,prd_t * prdt,int nr_prds)492 static int atapi_transfer(struct port_state *ps, int cmd, u64_t start_lba,
493 unsigned int count, int write, prd_t *prdt, int nr_prds)
494 {
495 /* Perform data transfer from or to an ATAPI device.
496 */
497 cmd_fis_t fis;
498 u8_t packet[ATAPI_PACKET_SIZE];
499
500 /* Fill in a Register Host to Device FIS. */
501 memset(&fis, 0, sizeof(fis));
502 fis.cf_cmd = ATA_CMD_PACKET;
503 fis.cf_feat = ATA_FEAT_PACKET_DMA;
504 if (!write && (ps->flags & FLAG_USE_DMADIR))
505 fis.cf_feat |= ATA_FEAT_PACKET_DMADIR;
506
507 /* Fill in a packet. */
508 memset(packet, 0, sizeof(packet));
509 packet[0] = write ? ATAPI_CMD_WRITE : ATAPI_CMD_READ;
510 packet[2] = (start_lba >> 24) & 0xFF;
511 packet[3] = (start_lba >> 16) & 0xFF;
512 packet[4] = (start_lba >> 8) & 0xFF;
513 packet[5] = start_lba & 0xFF;
514 packet[6] = (count >> 24) & 0xFF;
515 packet[7] = (count >> 16) & 0xFF;
516 packet[8] = (count >> 8) & 0xFF;
517 packet[9] = count & 0xFF;
518
519 /* Start the command, and wait for it to complete or fail. */
520 port_set_cmd(ps, cmd, &fis, packet, prdt, nr_prds, write);
521
522 return port_exec(ps, cmd, ahci_transfer_timeout);
523 }
524
525 /*===========================================================================*
526 * ata_id_check *
527 *===========================================================================*/
ata_id_check(struct port_state * ps,u16_t * buf)528 static int ata_id_check(struct port_state *ps, u16_t *buf)
529 {
530 /* Determine whether we support this ATA device based on the
531 * identification data it returned, and store some of its properties.
532 */
533
534 /* This must be an ATA device; it must not have removable media;
535 * it must support LBA and DMA; it must support the FLUSH CACHE
536 * command; it must support 48-bit addressing.
537 */
538 if ((buf[ATA_ID_GCAP] & (ATA_ID_GCAP_ATA_MASK | ATA_ID_GCAP_REMOVABLE |
539 ATA_ID_GCAP_INCOMPLETE)) != ATA_ID_GCAP_ATA ||
540 (buf[ATA_ID_CAP] & (ATA_ID_CAP_LBA | ATA_ID_CAP_DMA)) !=
541 (ATA_ID_CAP_LBA | ATA_ID_CAP_DMA) ||
542 (buf[ATA_ID_SUP1] & (ATA_ID_SUP1_VALID_MASK |
543 ATA_ID_SUP1_FLUSH | ATA_ID_SUP1_LBA48)) !=
544 (ATA_ID_SUP1_VALID | ATA_ID_SUP1_FLUSH | ATA_ID_SUP1_LBA48)) {
545
546 dprintf(V_ERR, ("%s: unsupported ATA device\n",
547 ahci_portname(ps)));
548
549 dprintf(V_DEV, ("%s: GCAP %04x CAP %04x SUP1 %04x\n",
550 ahci_portname(ps), buf[ATA_ID_GCAP], buf[ATA_ID_CAP],
551 buf[ATA_ID_SUP1]));
552
553 return FALSE;
554 }
555
556 /* Get number of LBA blocks, and sector size. */
557 ps->lba_count = ((u64_t) buf[ATA_ID_LBA3] << 48) |
558 ((u64_t) buf[ATA_ID_LBA2] << 32) |
559 ((u64_t) buf[ATA_ID_LBA1] << 16) |
560 (u64_t) buf[ATA_ID_LBA0];
561
562 /* Determine the queue depth of the device. */
563 if (hba_state.has_ncq &&
564 (buf[ATA_ID_SATA_CAP] & ATA_ID_SATA_CAP_NCQ)) {
565 ps->flags |= FLAG_HAS_NCQ;
566 ps->queue_depth =
567 (buf[ATA_ID_QDEPTH] & ATA_ID_QDEPTH_MASK) + 1;
568 if (ps->queue_depth > hba_state.nr_cmds)
569 ps->queue_depth = hba_state.nr_cmds;
570 }
571
572 /* For now, we only support long logical sectors. Long physical sector
573 * support may be added later. Note that the given value is in words.
574 */
575 if ((buf[ATA_ID_PLSS] & (ATA_ID_PLSS_VALID_MASK | ATA_ID_PLSS_LLS)) ==
576 (ATA_ID_PLSS_VALID | ATA_ID_PLSS_LLS))
577 ps->sector_size =
578 ((buf[ATA_ID_LSS1] << 16) | buf[ATA_ID_LSS0]) << 1;
579 else
580 ps->sector_size = ATA_SECTOR_SIZE;
581
582 if (ps->sector_size < ATA_SECTOR_SIZE) {
583 dprintf(V_ERR, ("%s: invalid sector size %u\n",
584 ahci_portname(ps), ps->sector_size));
585
586 return FALSE;
587 }
588
589 ps->flags |= FLAG_HAS_MEDIUM | FLAG_HAS_FLUSH;
590
591 /* FLUSH CACHE is mandatory for ATA devices; write caches are not. */
592 if (buf[ATA_ID_SUP0] & ATA_ID_SUP0_WCACHE)
593 ps->flags |= FLAG_HAS_WCACHE;
594
595 /* Check Force Unit Access capability of the device. */
596 if ((buf[ATA_ID_ENA2] & (ATA_ID_ENA2_VALID_MASK | ATA_ID_ENA2_FUA)) ==
597 (ATA_ID_ENA2_VALID | ATA_ID_ENA2_FUA))
598 ps->flags |= FLAG_HAS_FUA;
599
600 return TRUE;
601 }
602
603 /*===========================================================================*
604 * ata_transfer *
605 *===========================================================================*/
ata_transfer(struct port_state * ps,int cmd,u64_t start_lba,unsigned int count,int write,int force,prd_t * prdt,int nr_prds)606 static int ata_transfer(struct port_state *ps, int cmd, u64_t start_lba,
607 unsigned int count, int write, int force, prd_t *prdt, int nr_prds)
608 {
609 /* Perform data transfer from or to an ATA device.
610 */
611 cmd_fis_t fis;
612
613 assert(count <= ATA_MAX_SECTORS);
614
615 /* Special case for sector counts: 65536 is specified as 0. */
616 if (count == ATA_MAX_SECTORS)
617 count = 0;
618
619 memset(&fis, 0, sizeof(fis));
620 fis.cf_dev = ATA_DEV_LBA;
621 if (ps->flags & FLAG_HAS_NCQ) {
622 if (write) {
623 if (force && (ps->flags & FLAG_HAS_FUA))
624 fis.cf_dev |= ATA_DEV_FUA;
625
626 fis.cf_cmd = ATA_CMD_WRITE_FPDMA_QUEUED;
627 } else {
628 fis.cf_cmd = ATA_CMD_READ_FPDMA_QUEUED;
629 }
630 }
631 else {
632 if (write) {
633 if (force && (ps->flags & FLAG_HAS_FUA))
634 fis.cf_cmd = ATA_CMD_WRITE_DMA_FUA_EXT;
635 else
636 fis.cf_cmd = ATA_CMD_WRITE_DMA_EXT;
637 }
638 else {
639 fis.cf_cmd = ATA_CMD_READ_DMA_EXT;
640 }
641 }
642 fis.cf_lba = start_lba & 0x00FFFFFFUL;
643 fis.cf_lba_exp = (start_lba >> 24) & 0x00FFFFFFUL;
644 fis.cf_sec = count & 0xFF;
645 fis.cf_sec_exp = (count >> 8) & 0xFF;
646
647 /* Start the command, and wait for it to complete or fail. */
648 port_set_cmd(ps, cmd, &fis, NULL /*packet*/, prdt, nr_prds, write);
649
650 return port_exec(ps, cmd, ahci_transfer_timeout);
651 }
652
653 /*===========================================================================*
654 * gen_identify *
655 *===========================================================================*/
gen_identify(struct port_state * ps,int blocking)656 static int gen_identify(struct port_state *ps, int blocking)
657 {
658 /* Identify an ATA or ATAPI device. If the blocking flag is set, block
659 * until the command has completed; otherwise return immediately.
660 */
661 cmd_fis_t fis;
662 prd_t prd;
663
664 /* Set up a command, and a single PRD for the result. */
665 memset(&fis, 0, sizeof(fis));
666
667 if (ps->flags & FLAG_ATAPI)
668 fis.cf_cmd = ATA_CMD_IDENTIFY_PACKET;
669 else
670 fis.cf_cmd = ATA_CMD_IDENTIFY;
671
672 prd.vp_addr = ps->tmp_phys;
673 prd.vp_size = ATA_ID_SIZE;
674
675 /* Start the command, and possibly wait for the result. */
676 port_set_cmd(ps, 0, &fis, NULL /*packet*/, &prd, 1, FALSE /*write*/);
677
678 if (blocking)
679 return port_exec(ps, 0, ahci_command_timeout);
680
681 port_issue(ps, 0, ahci_command_timeout);
682
683 return OK;
684 }
685
686 /*===========================================================================*
687 * gen_flush_wcache *
688 *===========================================================================*/
gen_flush_wcache(struct port_state * ps)689 static int gen_flush_wcache(struct port_state *ps)
690 {
691 /* Flush the device's write cache.
692 */
693 cmd_fis_t fis;
694
695 /* The FLUSH CACHE command may not be supported by all (writable ATAPI)
696 * devices.
697 */
698 if (!(ps->flags & FLAG_HAS_FLUSH))
699 return EINVAL;
700
701 /* Use the FLUSH CACHE command for both ATA and ATAPI. We are not
702 * interested in the disk location of a failure, so there is no reason
703 * to use the ATA-only FLUSH CACHE EXT command. Either way, the command
704 * may indeed fail due to a disk error, in which case it should be
705 * repeated. For now, we shift this responsibility onto the caller.
706 */
707 memset(&fis, 0, sizeof(fis));
708 fis.cf_cmd = ATA_CMD_FLUSH_CACHE;
709
710 /* Start the command, and wait for it to complete or fail.
711 * The flush command may take longer than regular I/O commands.
712 */
713 port_set_cmd(ps, 0, &fis, NULL /*packet*/, NULL /*prdt*/, 0,
714 FALSE /*write*/);
715
716 return port_exec(ps, 0, ahci_flush_timeout);
717 }
718
719 /*===========================================================================*
720 * gen_get_wcache *
721 *===========================================================================*/
gen_get_wcache(struct port_state * ps,int * val)722 static int gen_get_wcache(struct port_state *ps, int *val)
723 {
724 /* Retrieve the status of the device's write cache.
725 */
726 int r;
727
728 /* Write caches are not mandatory. */
729 if (!(ps->flags & FLAG_HAS_WCACHE))
730 return EINVAL;
731
732 /* Retrieve information about the device. */
733 if ((r = gen_identify(ps, TRUE /*blocking*/)) != OK)
734 return r;
735
736 /* Return the current setting. */
737 *val = !!(((u16_t *) ps->tmp_base)[ATA_ID_ENA0] & ATA_ID_ENA0_WCACHE);
738
739 return OK;
740 }
741
742 /*===========================================================================*
743 * gen_set_wcache *
744 *===========================================================================*/
gen_set_wcache(struct port_state * ps,int enable)745 static int gen_set_wcache(struct port_state *ps, int enable)
746 {
747 /* Enable or disable the device's write cache.
748 */
749 cmd_fis_t fis;
750 clock_t timeout;
751
752 /* Write caches are not mandatory. */
753 if (!(ps->flags & FLAG_HAS_WCACHE))
754 return EINVAL;
755
756 /* Disabling the write cache causes a (blocking) cache flush. Cache
757 * flushes may take much longer than regular commands.
758 */
759 timeout = enable ? ahci_command_timeout : ahci_flush_timeout;
760
761 /* Set up a command. */
762 memset(&fis, 0, sizeof(fis));
763 fis.cf_cmd = ATA_CMD_SET_FEATURES;
764 fis.cf_feat = enable ? ATA_SF_EN_WCACHE : ATA_SF_DI_WCACHE;
765
766 /* Start the command, and wait for it to complete or fail. */
767 port_set_cmd(ps, 0, &fis, NULL /*packet*/, NULL /*prdt*/, 0,
768 FALSE /*write*/);
769
770 return port_exec(ps, 0, timeout);
771 }
772
773 /*===========================================================================*
774 * ct_set_fis *
775 *===========================================================================*/
ct_set_fis(u8_t * ct,cmd_fis_t * fis,unsigned int tag)776 static vir_bytes ct_set_fis(u8_t *ct, cmd_fis_t *fis, unsigned int tag)
777 {
778 /* Fill in the Frame Information Structure part of a command table,
779 * and return the resulting FIS size (in bytes). We only support the
780 * command Register - Host to Device FIS type.
781 */
782
783 memset(ct, 0, ATA_H2D_SIZE);
784 ct[ATA_FIS_TYPE] = ATA_FIS_TYPE_H2D;
785 ct[ATA_H2D_FLAGS] = ATA_H2D_FLAGS_C;
786 ct[ATA_H2D_CMD] = fis->cf_cmd;
787 ct[ATA_H2D_LBA_LOW] = fis->cf_lba & 0xFF;
788 ct[ATA_H2D_LBA_MID] = (fis->cf_lba >> 8) & 0xFF;
789 ct[ATA_H2D_LBA_HIGH] = (fis->cf_lba >> 16) & 0xFF;
790 ct[ATA_H2D_DEV] = fis->cf_dev;
791 ct[ATA_H2D_LBA_LOW_EXP] = fis->cf_lba_exp & 0xFF;
792 ct[ATA_H2D_LBA_MID_EXP] = (fis->cf_lba_exp >> 8) & 0xFF;
793 ct[ATA_H2D_LBA_HIGH_EXP] = (fis->cf_lba_exp >> 16) & 0xFF;
794 ct[ATA_H2D_CTL] = fis->cf_ctl;
795
796 if (ATA_IS_FPDMA_CMD(fis->cf_cmd)) {
797 ct[ATA_H2D_FEAT] = fis->cf_sec;
798 ct[ATA_H2D_FEAT_EXP] = fis->cf_sec_exp;
799 ct[ATA_H2D_SEC] = tag << ATA_SEC_TAG_SHIFT;
800 ct[ATA_H2D_SEC_EXP] = 0;
801 } else {
802 ct[ATA_H2D_FEAT] = fis->cf_feat;
803 ct[ATA_H2D_FEAT_EXP] = fis->cf_feat_exp;
804 ct[ATA_H2D_SEC] = fis->cf_sec;
805 ct[ATA_H2D_SEC_EXP] = fis->cf_sec_exp;
806 }
807
808 return ATA_H2D_SIZE;
809 }
810
811 /*===========================================================================*
812 * ct_set_packet *
813 *===========================================================================*/
ct_set_packet(u8_t * ct,u8_t packet[ATAPI_PACKET_SIZE])814 static void ct_set_packet(u8_t *ct, u8_t packet[ATAPI_PACKET_SIZE])
815 {
816 /* Fill in the packet part of a command table.
817 */
818
819 memcpy(&ct[AHCI_CT_PACKET_OFF], packet, ATAPI_PACKET_SIZE);
820 }
821
822 /*===========================================================================*
823 * ct_set_prdt *
824 *===========================================================================*/
ct_set_prdt(u8_t * ct,prd_t * prdt,int nr_prds)825 static void ct_set_prdt(u8_t *ct, prd_t *prdt, int nr_prds)
826 {
827 /* Fill in the PRDT part of a command table.
828 */
829 u32_t *p;
830 int i;
831
832 p = (u32_t *) &ct[AHCI_CT_PRDT_OFF];
833
834 for (i = 0; i < nr_prds; i++, prdt++) {
835 *p++ = prdt->vp_addr;
836 *p++ = 0;
837 *p++ = 0;
838 *p++ = prdt->vp_size - 1;
839 }
840 }
841
842 /*===========================================================================*
843 * port_set_cmd *
844 *===========================================================================*/
port_set_cmd(struct port_state * ps,int cmd,cmd_fis_t * fis,u8_t packet[ATAPI_PACKET_SIZE],prd_t * prdt,int nr_prds,int write)845 static void port_set_cmd(struct port_state *ps, int cmd, cmd_fis_t *fis,
846 u8_t packet[ATAPI_PACKET_SIZE], prd_t *prdt, int nr_prds, int write)
847 {
848 /* Prepare the given command for execution, by constructing a command
849 * table and setting up a command list entry pointing to the table.
850 */
851 u8_t *ct;
852 u32_t *cl;
853 vir_bytes size;
854
855 /* Set a port-specific flag that tells us if the command being
856 * processed is a NCQ command or not.
857 */
858 if (ATA_IS_FPDMA_CMD(fis->cf_cmd)) {
859 ps->flags |= FLAG_NCQ_MODE;
860 } else {
861 assert(!ps->pend_mask);
862 ps->flags &= ~FLAG_NCQ_MODE;
863 }
864
865 /* Construct a command table, consisting of a command FIS, optionally
866 * a packet, and optionally a number of PRDs (making up the actual PRD
867 * table).
868 */
869 ct = ps->ct_base[cmd];
870
871 assert(ct != NULL);
872 assert(nr_prds <= NR_PRDS);
873
874 size = ct_set_fis(ct, fis, cmd);
875
876 if (packet != NULL)
877 ct_set_packet(ct, packet);
878
879 ct_set_prdt(ct, prdt, nr_prds);
880
881 /* Construct a command list entry, pointing to the command's table.
882 * Current assumptions: callers always provide a Register - Host to
883 * Device type FIS, and all non-NCQ commands are prefetchable.
884 */
885 cl = &ps->cl_base[cmd * AHCI_CL_ENTRY_DWORDS];
886
887 memset(cl, 0, AHCI_CL_ENTRY_SIZE);
888 cl[0] = (nr_prds << AHCI_CL_PRDTL_SHIFT) |
889 ((!ATA_IS_FPDMA_CMD(fis->cf_cmd) &&
890 (nr_prds > 0 || packet != NULL)) ? AHCI_CL_PREFETCHABLE : 0) |
891 (write ? AHCI_CL_WRITE : 0) |
892 ((packet != NULL) ? AHCI_CL_ATAPI : 0) |
893 ((size / sizeof(u32_t)) << AHCI_CL_CFL_SHIFT);
894 cl[2] = ps->ct_phys[cmd];
895 }
896
897 /*===========================================================================*
898 * port_finish_cmd *
899 *===========================================================================*/
port_finish_cmd(struct port_state * ps,int cmd,int result)900 static void port_finish_cmd(struct port_state *ps, int cmd, int result)
901 {
902 /* Finish a command that has either succeeded or failed.
903 */
904
905 assert(cmd < ps->queue_depth);
906
907 dprintf(V_REQ, ("%s: command %d %s\n", ahci_portname(ps),
908 cmd, (result == RESULT_SUCCESS) ? "succeeded" : "failed"));
909
910 /* Update the command result, and clear it from the pending list. */
911 ps->cmd_info[cmd].result = result;
912
913 assert(ps->pend_mask & (1 << cmd));
914 ps->pend_mask &= ~(1 << cmd);
915
916 /* Wake up the thread, unless it is the main thread. This can happen
917 * during initialization, as the gen_identify function is called by the
918 * main thread itself.
919 */
920 if (ps->state != STATE_WAIT_ID)
921 blockdriver_mt_wakeup(ps->cmd_info[cmd].tid);
922 }
923
924 /*===========================================================================*
925 * port_fail_cmds *
926 *===========================================================================*/
port_fail_cmds(struct port_state * ps)927 static void port_fail_cmds(struct port_state *ps)
928 {
929 /* Fail all ongoing commands for a device.
930 */
931 int i;
932
933 for (i = 0; ps->pend_mask != 0 && i < ps->queue_depth; i++)
934 if (ps->pend_mask & (1 << i))
935 port_finish_cmd(ps, i, RESULT_FAILURE);
936 }
937
938 /*===========================================================================*
939 * port_check_cmds *
940 *===========================================================================*/
port_check_cmds(struct port_state * ps)941 static void port_check_cmds(struct port_state *ps)
942 {
943 /* Check what commands have completed, and finish them.
944 */
945 u32_t mask, done;
946 int i;
947
948 /* See which commands have completed. */
949 if (ps->flags & FLAG_NCQ_MODE)
950 mask = port_read(ps, AHCI_PORT_SACT);
951 else
952 mask = port_read(ps, AHCI_PORT_CI);
953
954 /* Wake up threads corresponding to completed commands. */
955 done = ps->pend_mask & ~mask;
956
957 for (i = 0; i < ps->queue_depth; i++)
958 if (done & (1 << i))
959 port_finish_cmd(ps, i, RESULT_SUCCESS);
960 }
961
962 /*===========================================================================*
963 * port_find_cmd *
964 *===========================================================================*/
port_find_cmd(struct port_state * ps)965 static int port_find_cmd(struct port_state *ps)
966 {
967 /* Find a free command tag to queue the current request.
968 */
969 int i;
970
971 for (i = 0; i < ps->queue_depth; i++)
972 if (!(ps->pend_mask & (1 << i)))
973 break;
974
975 /* We should always be able to find a free slot, since a thread runs
976 * only when it is free, and thus, only because a slot is available.
977 */
978 assert(i < ps->queue_depth);
979
980 return i;
981 }
982
983 /*===========================================================================*
984 * port_get_padbuf *
985 *===========================================================================*/
port_get_padbuf(struct port_state * ps,size_t size)986 static int port_get_padbuf(struct port_state *ps, size_t size)
987 {
988 /* Make available a temporary buffer for use by this port. Enlarge the
989 * previous buffer if applicable and necessary, potentially changing
990 * its physical address.
991 */
992
993 if (ps->pad_base != NULL && ps->pad_size >= size)
994 return OK;
995
996 if (ps->pad_base != NULL)
997 free_contig(ps->pad_base, ps->pad_size);
998
999 ps->pad_size = size;
1000 ps->pad_base = alloc_contig(ps->pad_size, 0, &ps->pad_phys);
1001
1002 if (ps->pad_base == NULL) {
1003 dprintf(V_ERR, ("%s: unable to allocate a padding buffer of "
1004 "size %lu\n", ahci_portname(ps),
1005 (unsigned long) size));
1006
1007 return ENOMEM;
1008 }
1009
1010 dprintf(V_INFO, ("%s: allocated padding buffer of size %lu\n",
1011 ahci_portname(ps), (unsigned long) size));
1012
1013 return OK;
1014 }
1015
1016 /*===========================================================================*
1017 * sum_iovec *
1018 *===========================================================================*/
sum_iovec(struct port_state * ps,endpoint_t endpt,iovec_s_t * iovec,int nr_req,vir_bytes * total)1019 static int sum_iovec(struct port_state *ps, endpoint_t endpt,
1020 iovec_s_t *iovec, int nr_req, vir_bytes *total)
1021 {
1022 /* Retrieve the total size of the given I/O vector. Check for alignment
1023 * requirements along the way. Return OK (and the total request size)
1024 * or an error.
1025 */
1026 vir_bytes size, bytes;
1027 int i;
1028
1029 bytes = 0;
1030
1031 for (i = 0; i < nr_req; i++) {
1032 size = iovec[i].iov_size;
1033
1034 if (size == 0 || (size & 1) || size > LONG_MAX) {
1035 dprintf(V_ERR, ("%s: bad size %lu in iovec from %d\n",
1036 ahci_portname(ps), size, endpt));
1037 return EINVAL;
1038 }
1039
1040 bytes += size;
1041
1042 if (bytes > LONG_MAX) {
1043 dprintf(V_ERR, ("%s: iovec size overflow from %d\n",
1044 ahci_portname(ps), endpt));
1045 return EINVAL;
1046 }
1047 }
1048
1049 *total = bytes;
1050 return OK;
1051 }
1052
1053 /*===========================================================================*
1054 * setup_prdt *
1055 *===========================================================================*/
setup_prdt(struct port_state * ps,endpoint_t endpt,iovec_s_t * iovec,int nr_req,vir_bytes size,vir_bytes lead,int write,prd_t * prdt)1056 static int setup_prdt(struct port_state *ps, endpoint_t endpt,
1057 iovec_s_t *iovec, int nr_req, vir_bytes size, vir_bytes lead,
1058 int write, prd_t *prdt)
1059 {
1060 /* Convert (the first part of) an I/O vector to a Physical Region
1061 * Descriptor Table describing array that can later be used to set the
1062 * command's real PRDT. The resulting table as a whole should be
1063 * sector-aligned; leading and trailing local buffers may have to be
1064 * used for padding as appropriate. Return the number of PRD entries,
1065 * or a negative error code.
1066 */
1067 struct vumap_vir vvec[NR_PRDS];
1068 size_t bytes, trail;
1069 int i, r, pcount, nr_prds = 0;
1070
1071 if (lead > 0) {
1072 /* Allocate a buffer for the data we don't want. */
1073 if ((r = port_get_padbuf(ps, ps->sector_size)) != OK)
1074 return r;
1075
1076 prdt[nr_prds].vp_addr = ps->pad_phys;
1077 prdt[nr_prds].vp_size = lead;
1078 nr_prds++;
1079 }
1080
1081 /* The sum of lead, size, trail has to be sector-aligned. */
1082 trail = (ps->sector_size - (lead + size)) % ps->sector_size;
1083
1084 /* Get the physical addresses of the given buffers. */
1085 for (i = 0; i < nr_req && size > 0; i++) {
1086 bytes = MIN(iovec[i].iov_size, size);
1087
1088 if (endpt == SELF)
1089 vvec[i].vv_addr = (vir_bytes) iovec[i].iov_grant;
1090 else
1091 vvec[i].vv_grant = iovec[i].iov_grant;
1092
1093 vvec[i].vv_size = bytes;
1094
1095 size -= bytes;
1096 }
1097
1098 pcount = i;
1099
1100 if ((r = sys_vumap(endpt, vvec, i, 0, write ? VUA_READ : VUA_WRITE,
1101 &prdt[nr_prds], &pcount)) != OK) {
1102 dprintf(V_ERR, ("%s: unable to map memory from %d (%d)\n",
1103 ahci_portname(ps), endpt, r));
1104 return r;
1105 }
1106
1107 assert(pcount > 0 && pcount <= i);
1108
1109 /* Make sure all buffers are physically contiguous and word-aligned. */
1110 for (i = 0; i < pcount; i++) {
1111 if (vvec[i].vv_size != prdt[nr_prds].vp_size) {
1112 dprintf(V_ERR, ("%s: non-contiguous memory from %d\n",
1113 ahci_portname(ps), endpt));
1114 return EINVAL;
1115 }
1116
1117 if (prdt[nr_prds].vp_addr & 1) {
1118 dprintf(V_ERR, ("%s: bad physical address from %d\n",
1119 ahci_portname(ps), endpt));
1120 return EINVAL;
1121 }
1122
1123 nr_prds++;
1124 }
1125
1126 if (trail > 0) {
1127 assert(nr_prds < NR_PRDS);
1128 prdt[nr_prds].vp_addr = ps->pad_phys + lead;
1129 prdt[nr_prds].vp_size = trail;
1130 nr_prds++;
1131 }
1132
1133 return nr_prds;
1134 }
1135
1136 /*===========================================================================*
1137 * port_transfer *
1138 *===========================================================================*/
port_transfer(struct port_state * ps,u64_t pos,u64_t eof,endpoint_t endpt,iovec_s_t * iovec,int nr_req,int write,int flags)1139 static ssize_t port_transfer(struct port_state *ps, u64_t pos, u64_t eof,
1140 endpoint_t endpt, iovec_s_t *iovec, int nr_req, int write, int flags)
1141 {
1142 /* Perform an I/O transfer on a port.
1143 */
1144 prd_t prdt[NR_PRDS];
1145 vir_bytes size, lead;
1146 unsigned int count, nr_prds;
1147 u64_t start_lba;
1148 int r, cmd;
1149
1150 /* Get the total request size from the I/O vector. */
1151 if ((r = sum_iovec(ps, endpt, iovec, nr_req, &size)) != OK)
1152 return r;
1153
1154 dprintf(V_REQ, ("%s: %s for %lu bytes at pos %llx\n",
1155 ahci_portname(ps), write ? "write" : "read", size, pos));
1156
1157 assert(ps->state == STATE_GOOD_DEV);
1158 assert(ps->flags & FLAG_HAS_MEDIUM);
1159 assert(ps->sector_size > 0);
1160
1161 /* Limit the maximum size of a single transfer.
1162 * See the comments at the top of this file for details.
1163 */
1164 if (size > MAX_TRANSFER)
1165 size = MAX_TRANSFER;
1166
1167 /* If necessary, reduce the request size so that the request does not
1168 * extend beyond the end of the partition. The caller already
1169 * guarantees that the starting position lies within the partition.
1170 */
1171 if (pos + size > eof)
1172 size = (vir_bytes) (eof - pos);
1173
1174 start_lba = pos / ps->sector_size;
1175 lead = (vir_bytes) (pos % ps->sector_size);
1176 count = (lead + size + ps->sector_size - 1) / ps->sector_size;
1177
1178 /* Position must be word-aligned for read requests, and sector-aligned
1179 * for write requests. We do not support read-modify-write for writes.
1180 */
1181 if ((lead & 1) || (write && lead != 0)) {
1182 dprintf(V_ERR, ("%s: unaligned position from %d\n",
1183 ahci_portname(ps), endpt));
1184 return EINVAL;
1185 }
1186
1187 /* Write requests must be sector-aligned. Word alignment of the size is
1188 * already guaranteed by sum_iovec().
1189 */
1190 if (write && (size % ps->sector_size) != 0) {
1191 dprintf(V_ERR, ("%s: unaligned size %lu from %d\n",
1192 ahci_portname(ps), size, endpt));
1193 return EINVAL;
1194 }
1195
1196 /* Create a vector of physical addresses and sizes for the transfer. */
1197 nr_prds = r = setup_prdt(ps, endpt, iovec, nr_req, size, lead, write,
1198 prdt);
1199
1200 if (r < 0) return r;
1201
1202 /* Perform the actual transfer. */
1203 cmd = port_find_cmd(ps);
1204
1205 if (ps->flags & FLAG_ATAPI)
1206 r = atapi_transfer(ps, cmd, start_lba, count, write, prdt,
1207 nr_prds);
1208 else
1209 r = ata_transfer(ps, cmd, start_lba, count, write,
1210 !!(flags & BDEV_FORCEWRITE), prdt, nr_prds);
1211
1212 if (r != OK) return r;
1213
1214 return size;
1215 }
1216
1217 /*===========================================================================*
1218 * port_hardreset *
1219 *===========================================================================*/
port_hardreset(struct port_state * ps)1220 static void port_hardreset(struct port_state *ps)
1221 {
1222 /* Perform a port-level (hard) reset on the given port.
1223 */
1224
1225 port_write(ps, AHCI_PORT_SCTL, AHCI_PORT_SCTL_DET_INIT);
1226
1227 micro_delay(COMRESET_DELAY * 1000); /* COMRESET_DELAY is in ms */
1228
1229 port_write(ps, AHCI_PORT_SCTL, AHCI_PORT_SCTL_DET_NONE);
1230 }
1231
1232 /*===========================================================================*
1233 * port_override *
1234 *===========================================================================*/
port_override(struct port_state * ps)1235 static void port_override(struct port_state *ps)
1236 {
1237 /* Override the port's BSY and/or DRQ flags. This may only be done
1238 * prior to starting the port.
1239 */
1240 u32_t cmd;
1241
1242 cmd = port_read(ps, AHCI_PORT_CMD);
1243 port_write(ps, AHCI_PORT_CMD, cmd | AHCI_PORT_CMD_CLO);
1244
1245 SPIN_UNTIL(!(port_read(ps, AHCI_PORT_CMD) & AHCI_PORT_CMD_CLO),
1246 PORTREG_DELAY);
1247
1248 dprintf(V_INFO, ("%s: overridden\n", ahci_portname(ps)));
1249 }
1250
1251 /*===========================================================================*
1252 * port_start *
1253 *===========================================================================*/
port_start(struct port_state * ps)1254 static void port_start(struct port_state *ps)
1255 {
1256 /* Start the given port, allowing for the execution of commands and the
1257 * transfer of data on that port.
1258 */
1259 u32_t cmd;
1260
1261 /* Reset status registers. */
1262 port_write(ps, AHCI_PORT_SERR, ~0);
1263 port_write(ps, AHCI_PORT_IS, ~0);
1264
1265 /* Start the port. */
1266 cmd = port_read(ps, AHCI_PORT_CMD);
1267 port_write(ps, AHCI_PORT_CMD, cmd | AHCI_PORT_CMD_ST);
1268
1269 dprintf(V_INFO, ("%s: started\n", ahci_portname(ps)));
1270 }
1271
1272 /*===========================================================================*
1273 * port_stop *
1274 *===========================================================================*/
port_stop(struct port_state * ps)1275 static void port_stop(struct port_state *ps)
1276 {
1277 /* Stop the given port, if not already stopped.
1278 */
1279 u32_t cmd;
1280
1281 cmd = port_read(ps, AHCI_PORT_CMD);
1282
1283 if (cmd & (AHCI_PORT_CMD_CR | AHCI_PORT_CMD_ST)) {
1284 port_write(ps, AHCI_PORT_CMD, cmd & ~AHCI_PORT_CMD_ST);
1285
1286 SPIN_UNTIL(!(port_read(ps, AHCI_PORT_CMD) & AHCI_PORT_CMD_CR),
1287 PORTREG_DELAY);
1288
1289 dprintf(V_INFO, ("%s: stopped\n", ahci_portname(ps)));
1290 }
1291 }
1292
1293 /*===========================================================================*
1294 * port_restart *
1295 *===========================================================================*/
port_restart(struct port_state * ps)1296 static void port_restart(struct port_state *ps)
1297 {
1298 /* Restart a port after a fatal error has occurred.
1299 */
1300
1301 /* Fail all outstanding commands. */
1302 port_fail_cmds(ps);
1303
1304 /* Stop the port. */
1305 port_stop(ps);
1306
1307 /* If the BSY and/or DRQ flags are set, reset the port. */
1308 if (port_read(ps, AHCI_PORT_TFD) &
1309 (AHCI_PORT_TFD_STS_BSY | AHCI_PORT_TFD_STS_DRQ)) {
1310
1311 dprintf(V_ERR, ("%s: port reset\n", ahci_portname(ps)));
1312
1313 /* To keep this driver simple, we do not transparently recover
1314 * ongoing requests. Instead, we mark the failing device as
1315 * disconnected, and reset it. If the reset succeeds, the
1316 * device (or, perhaps, eventually, another device) will come
1317 * back up. Any current and future requests to this port will
1318 * be failed until the port is fully closed and reopened.
1319 */
1320 port_disconnect(ps);
1321
1322 /* Trigger a port reset. */
1323 port_hardreset(ps);
1324
1325 return;
1326 }
1327
1328 /* Start the port. */
1329 port_start(ps);
1330 }
1331
1332 /*===========================================================================*
1333 * print_string *
1334 *===========================================================================*/
print_string(u16_t * buf,int start,int end)1335 static void print_string(u16_t *buf, int start, int end)
1336 {
1337 /* Print a string that is stored as little-endian words and padded with
1338 * trailing spaces.
1339 */
1340 int i, last = 0;
1341
1342 while (end >= start && buf[end] == 0x2020) end--;
1343
1344 if (end >= start && (buf[end] & 0xFF) == 0x20) end--, last++;
1345
1346 for (i = start; i <= end; i++)
1347 printf("%c%c", buf[i] >> 8, buf[i] & 0xFF);
1348
1349 if (last)
1350 printf("%c", buf[i] >> 8);
1351 }
1352
1353 /*===========================================================================*
1354 * port_id_check *
1355 *===========================================================================*/
port_id_check(struct port_state * ps,int success)1356 static void port_id_check(struct port_state *ps, int success)
1357 {
1358 /* The device identification command has either completed or timed out.
1359 * Decide whether this device is usable or not, and store some of its
1360 * properties.
1361 */
1362 u16_t *buf;
1363
1364 assert(ps->state == STATE_WAIT_ID);
1365
1366 ps->flags &= ~FLAG_BUSY;
1367 cancel_timer(&ps->cmd_info[0].timer);
1368
1369 if (!success) {
1370 if (!(ps->flags & FLAG_ATAPI) &&
1371 port_read(ps, AHCI_PORT_SIG) != ATA_SIG_ATA) {
1372 dprintf(V_INFO, ("%s: may not be ATA, trying ATAPI\n",
1373 ahci_portname(ps)));
1374
1375 ps->flags |= FLAG_ATAPI;
1376
1377 (void) gen_identify(ps, FALSE /*blocking*/);
1378 return;
1379 }
1380
1381 dprintf(V_ERR,
1382 ("%s: unable to identify\n", ahci_portname(ps)));
1383 }
1384
1385 /* If the identify command itself succeeded, check the results and
1386 * store some properties.
1387 */
1388 if (success) {
1389 buf = (u16_t *) ps->tmp_base;
1390
1391 if (ps->flags & FLAG_ATAPI)
1392 success = atapi_id_check(ps, buf);
1393 else
1394 success = ata_id_check(ps, buf);
1395 }
1396
1397 /* If the device has not been identified successfully, mark it as an
1398 * unusable device.
1399 */
1400 if (!success) {
1401 port_stop(ps);
1402
1403 ps->state = STATE_BAD_DEV;
1404 port_write(ps, AHCI_PORT_IE, AHCI_PORT_IE_PRCE);
1405
1406 return;
1407 }
1408
1409 /* The device has been identified successfully, and hence usable. */
1410 ps->state = STATE_GOOD_DEV;
1411
1412 /* Print some information about the device. */
1413 if (ahci_verbose >= V_INFO) {
1414 printf("%s: ATA%s, ", ahci_portname(ps),
1415 (ps->flags & FLAG_ATAPI) ? "PI" : "");
1416 print_string(buf, 27, 46);
1417 if (ahci_verbose >= V_DEV) {
1418 printf(" (");
1419 print_string(buf, 10, 19);
1420 printf(", ");
1421 print_string(buf, 23, 26);
1422 printf(")");
1423 }
1424
1425 if (ps->flags & FLAG_HAS_MEDIUM)
1426 printf(", %u byte sectors, %llu MB size",
1427 ps->sector_size,
1428 ps->lba_count * ps->sector_size / (1024*1024));
1429
1430 printf("\n");
1431 }
1432 }
1433
1434 /*===========================================================================*
1435 * port_connect *
1436 *===========================================================================*/
port_connect(struct port_state * ps)1437 static void port_connect(struct port_state *ps)
1438 {
1439 /* A device has been found to be attached to this port. Start the port,
1440 * and do timed polling for its signature to become available.
1441 */
1442 u32_t status, sig;
1443
1444 dprintf(V_INFO, ("%s: device connected\n", ahci_portname(ps)));
1445
1446 port_start(ps);
1447
1448 /* The next check covers a purely hypothetical race condition, where
1449 * the device would disappear right before we try to start it. This is
1450 * possible because we have to clear PxSERR, and with that, the DIAG.N
1451 * bit. Double-check the port status, and if it is not as we expect,
1452 * infer a disconnection.
1453 */
1454 status = port_read(ps, AHCI_PORT_SSTS) & AHCI_PORT_SSTS_DET_MASK;
1455
1456 if (status != AHCI_PORT_SSTS_DET_PHY) {
1457 dprintf(V_ERR, ("%s: device vanished!\n", ahci_portname(ps)));
1458
1459 port_stop(ps);
1460
1461 ps->state = STATE_NO_DEV;
1462 ps->flags &= ~FLAG_BUSY;
1463
1464 return;
1465 }
1466
1467 /* Clear all state flags except the busy flag, which may be relevant if
1468 * a BDEV_OPEN call is waiting for the device to become ready; the
1469 * barrier flag, which prevents access to the device until it is
1470 * completely closed and (re)opened; and, the thread suspension flag.
1471 */
1472 ps->flags &= (FLAG_BUSY | FLAG_BARRIER | FLAG_SUSPENDED);
1473
1474 /* Check the port's signature. We only use the signature to speed up
1475 * identification; we will try both ATA and ATAPI if the signature is
1476 * neither ATA nor ATAPI.
1477 */
1478 sig = port_read(ps, AHCI_PORT_SIG);
1479
1480 if (sig == ATA_SIG_ATAPI)
1481 ps->flags |= FLAG_ATAPI;
1482
1483 /* Attempt to identify the device. Do this using continuation, because
1484 * we may already be called from port_wait() here, and could end up
1485 * confusing the timer expiration procedure.
1486 */
1487 ps->state = STATE_WAIT_ID;
1488 port_write(ps, AHCI_PORT_IE, AHCI_PORT_IE_MASK);
1489
1490 (void) gen_identify(ps, FALSE /*blocking*/);
1491 }
1492
1493 /*===========================================================================*
1494 * port_disconnect *
1495 *===========================================================================*/
port_disconnect(struct port_state * ps)1496 static void port_disconnect(struct port_state *ps)
1497 {
1498 /* The device has detached from this port. It has already been stopped.
1499 */
1500
1501 dprintf(V_INFO, ("%s: device disconnected\n", ahci_portname(ps)));
1502
1503 ps->state = STATE_NO_DEV;
1504 port_write(ps, AHCI_PORT_IE, AHCI_PORT_IE_PCE);
1505 ps->flags &= ~FLAG_BUSY;
1506
1507 /* Fail any ongoing request. The caller may already have done this. */
1508 port_fail_cmds(ps);
1509
1510 /* Block any further access until the device is completely closed and
1511 * reopened. This prevents arbitrary I/O to a newly plugged-in device
1512 * without upper layers noticing.
1513 */
1514 ps->flags |= FLAG_BARRIER;
1515
1516 /* Inform the blockdriver library to reduce the number of threads. */
1517 blockdriver_mt_set_workers(ps->device, 1);
1518 }
1519
1520 /*===========================================================================*
1521 * port_dev_check *
1522 *===========================================================================*/
port_dev_check(struct port_state * ps)1523 static void port_dev_check(struct port_state *ps)
1524 {
1525 /* Perform device detection by means of polling.
1526 */
1527 u32_t status, tfd;
1528
1529 assert(ps->state == STATE_WAIT_DEV);
1530
1531 status = port_read(ps, AHCI_PORT_SSTS) & AHCI_PORT_SSTS_DET_MASK;
1532
1533 dprintf(V_DEV, ("%s: polled status %u\n", ahci_portname(ps), status));
1534
1535 switch (status) {
1536 case AHCI_PORT_SSTS_DET_PHY:
1537 tfd = port_read(ps, AHCI_PORT_TFD);
1538
1539 /* If a Phy connection has been established, and the BSY and
1540 * DRQ flags are cleared, the device is ready.
1541 */
1542 if (!(tfd & (AHCI_PORT_TFD_STS_BSY | AHCI_PORT_TFD_STS_DRQ))) {
1543 port_connect(ps);
1544
1545 return;
1546 }
1547
1548 /* fall-through */
1549 case AHCI_PORT_SSTS_DET_DET:
1550 /* A device has been detected, but it is not ready yet. Try for
1551 * a while before giving up. This may take seconds.
1552 */
1553 if (ps->left > 0) {
1554 ps->left--;
1555 set_timer(&ps->cmd_info[0].timer, ahci_device_delay,
1556 port_timeout, BUILD_ARG(ps - port_state, 0));
1557 return;
1558 }
1559 }
1560
1561 dprintf(V_INFO, ("%s: device not ready\n", ahci_portname(ps)));
1562
1563 /* We get here on timeout, and if the HBA reports that there is no
1564 * device present at all. In all cases, we change to another state.
1565 */
1566 if (status == AHCI_PORT_SSTS_DET_PHY) {
1567 /* Some devices may not correctly clear BSY/DRQ. Upon timeout,
1568 * if we can override these flags, do so and start the
1569 * identification process anyway.
1570 */
1571 if (hba_state.has_clo) {
1572 port_override(ps);
1573
1574 port_connect(ps);
1575
1576 return;
1577 }
1578
1579 /* A device is present and initialized, but not ready. */
1580 ps->state = STATE_BAD_DEV;
1581 port_write(ps, AHCI_PORT_IE, AHCI_PORT_IE_PRCE);
1582 } else {
1583 /* A device may or may not be present, but it does not appear
1584 * to be ready in any case. Ignore it until the next device
1585 * initialization event.
1586 */
1587 ps->state = STATE_NO_DEV;
1588 ps->flags &= ~FLAG_BUSY;
1589 }
1590 }
1591
1592 /*===========================================================================*
1593 * port_intr *
1594 *===========================================================================*/
port_intr(struct port_state * ps)1595 static void port_intr(struct port_state *ps)
1596 {
1597 /* Process an interrupt on this port.
1598 */
1599 u32_t smask, emask;
1600 int success;
1601
1602 if (ps->state == STATE_NO_PORT) {
1603 dprintf(V_ERR, ("%s: interrupt for invalid port!\n",
1604 ahci_portname(ps)));
1605
1606 return;
1607 }
1608
1609 smask = port_read(ps, AHCI_PORT_IS);
1610 emask = smask & port_read(ps, AHCI_PORT_IE);
1611
1612 /* Clear the interrupt flags that we saw were set. */
1613 port_write(ps, AHCI_PORT_IS, smask);
1614
1615 dprintf(V_REQ, ("%s: interrupt (%08x)\n", ahci_portname(ps), smask));
1616
1617 /* Check if any commands have completed. */
1618 port_check_cmds(ps);
1619
1620 if (emask & AHCI_PORT_IS_PCS) {
1621 /* Clear the X diagnostics bit to clear this interrupt. */
1622 port_write(ps, AHCI_PORT_SERR, AHCI_PORT_SERR_DIAG_X);
1623
1624 dprintf(V_DEV, ("%s: device attached\n", ahci_portname(ps)));
1625
1626 switch (ps->state) {
1627 case STATE_SPIN_UP:
1628 case STATE_NO_DEV:
1629 /* Reportedly, a device has shown up. Start polling its
1630 * status until it has become ready.
1631 */
1632
1633 if (ps->state == STATE_SPIN_UP)
1634 cancel_timer(&ps->cmd_info[0].timer);
1635
1636 ps->state = STATE_WAIT_DEV;
1637 ps->left = ahci_device_checks;
1638
1639 port_dev_check(ps);
1640
1641 break;
1642
1643 case STATE_WAIT_DEV:
1644 /* Nothing else to do. */
1645 break;
1646
1647 default:
1648 /* Impossible. */
1649 assert(0);
1650 }
1651 } else if (emask & AHCI_PORT_IS_PRCS) {
1652 /* Clear the N diagnostics bit to clear this interrupt. */
1653 port_write(ps, AHCI_PORT_SERR, AHCI_PORT_SERR_DIAG_N);
1654
1655 dprintf(V_DEV, ("%s: device detached\n", ahci_portname(ps)));
1656
1657 switch (ps->state) {
1658 case STATE_WAIT_ID:
1659 case STATE_GOOD_DEV:
1660 /* The device is no longer ready. Stop the port, cancel
1661 * ongoing requests, and disconnect the device.
1662 */
1663 port_stop(ps);
1664
1665 /* fall-through */
1666 case STATE_BAD_DEV:
1667 port_disconnect(ps);
1668
1669 /* The device has become unusable to us at this point.
1670 * Reset the port to make sure that once the device (or
1671 * another device) becomes usable again, we will get a
1672 * PCS interrupt as well.
1673 */
1674 port_hardreset(ps);
1675
1676 break;
1677
1678 default:
1679 /* Impossible. */
1680 assert(0);
1681 }
1682 } else if (smask & AHCI_PORT_IS_MASK) {
1683 /* We assume that any other interrupt indicates command
1684 * completion or (command or device) failure. Unfortunately, if
1685 * an NCQ command failed, we cannot easily determine which one
1686 * it was. For that reason, after completing all successfully
1687 * finished commands (above), we fail all other outstanding
1688 * commands and restart the port. This can possibly be improved
1689 * later by obtaining per-command status results from the HBA.
1690 */
1691
1692 success = !(port_read(ps, AHCI_PORT_TFD) &
1693 (AHCI_PORT_TFD_STS_ERR | AHCI_PORT_TFD_STS_DF));
1694
1695 /* Check now for failure. There are fatal failures, and there
1696 * are failures that set the TFD.STS.ERR field using a D2H
1697 * FIS. In both cases, we just restart the port, failing all
1698 * commands in the process.
1699 */
1700 if ((port_read(ps, AHCI_PORT_TFD) &
1701 (AHCI_PORT_TFD_STS_ERR | AHCI_PORT_TFD_STS_DF)) ||
1702 (smask & AHCI_PORT_IS_RESTART)) {
1703 port_restart(ps);
1704 }
1705
1706 /* If we were waiting for ID verification, check now. */
1707 if (ps->state == STATE_WAIT_ID)
1708 port_id_check(ps, success);
1709 }
1710 }
1711
1712 /*===========================================================================*
1713 * port_timeout *
1714 *===========================================================================*/
port_timeout(int arg)1715 static void port_timeout(int arg)
1716 {
1717 /* A timeout has occurred on this port. Figure out what the timeout is
1718 * for, and take appropriate action.
1719 */
1720 struct port_state *ps;
1721 int port, cmd;
1722
1723 port = GET_PORT(arg);
1724 cmd = GET_TAG(arg);
1725
1726 assert(port >= 0 && port < hba_state.nr_ports);
1727
1728 ps = &port_state[port];
1729
1730 /* Regardless of the outcome of this timeout, wake up the thread if it
1731 * is suspended. This applies only during the initialization.
1732 */
1733 if (ps->flags & FLAG_SUSPENDED) {
1734 assert(cmd == 0);
1735 blockdriver_mt_wakeup(ps->cmd_info[0].tid);
1736 }
1737
1738 /* If detection of a device after startup timed out, give up on initial
1739 * detection and only look for hot plug events from now on.
1740 */
1741 if (ps->state == STATE_SPIN_UP) {
1742 /* One exception: if the PCS interrupt bit is set here, then we
1743 * are probably running on VirtualBox, which is currently not
1744 * always raising interrupts when setting interrupt bits (!).
1745 */
1746 if (port_read(ps, AHCI_PORT_IS) & AHCI_PORT_IS_PCS) {
1747 dprintf(V_INFO, ("%s: bad controller, no interrupt\n",
1748 ahci_portname(ps)));
1749
1750 ps->state = STATE_WAIT_DEV;
1751 ps->left = ahci_device_checks;
1752
1753 port_dev_check(ps);
1754
1755 return;
1756 } else {
1757 dprintf(V_INFO, ("%s: spin-up timeout\n",
1758 ahci_portname(ps)));
1759
1760 /* If the busy flag is set, a BDEV_OPEN request is
1761 * waiting for the detection to finish; clear the busy
1762 * flag to return an error to the caller.
1763 */
1764 ps->state = STATE_NO_DEV;
1765 ps->flags &= ~FLAG_BUSY;
1766 }
1767
1768 return;
1769 }
1770
1771 /* If we are waiting for a device to become connected and initialized,
1772 * check now.
1773 */
1774 if (ps->state == STATE_WAIT_DEV) {
1775 port_dev_check(ps);
1776
1777 return;
1778 }
1779
1780 dprintf(V_ERR, ("%s: timeout\n", ahci_portname(ps)));
1781
1782 /* Restart the port, failing all current commands. */
1783 port_restart(ps);
1784
1785 /* Finish up the identify operation. */
1786 if (ps->state == STATE_WAIT_ID)
1787 port_id_check(ps, FALSE);
1788 }
1789
1790 /*===========================================================================*
1791 * port_wait *
1792 *===========================================================================*/
port_wait(struct port_state * ps)1793 static void port_wait(struct port_state *ps)
1794 {
1795 /* Suspend the current thread until the given port is no longer busy,
1796 * due to either command completion or timeout.
1797 */
1798
1799 ps->flags |= FLAG_SUSPENDED;
1800
1801 while (ps->flags & FLAG_BUSY)
1802 blockdriver_mt_sleep();
1803
1804 ps->flags &= ~FLAG_SUSPENDED;
1805 }
1806
1807 /*===========================================================================*
1808 * port_issue *
1809 *===========================================================================*/
port_issue(struct port_state * ps,int cmd,clock_t timeout)1810 static void port_issue(struct port_state *ps, int cmd, clock_t timeout)
1811 {
1812 /* Issue a command to the port, and set a timer to trigger a timeout
1813 * if the command takes too long to complete.
1814 */
1815
1816 /* Set the corresponding NCQ command bit, if applicable. */
1817 if (ps->flags & FLAG_HAS_NCQ)
1818 port_write(ps, AHCI_PORT_SACT, 1 << cmd);
1819
1820 /* Make sure that the compiler does not delay any previous write
1821 * operations until after the write to the command issue register.
1822 */
1823 __insn_barrier();
1824
1825 /* Tell the controller that a new command is ready. */
1826 port_write(ps, AHCI_PORT_CI, 1 << cmd);
1827
1828 /* Update pending commands. */
1829 ps->pend_mask |= 1 << cmd;
1830
1831 /* Set a timer in case the command does not complete at all. */
1832 set_timer(&ps->cmd_info[cmd].timer, timeout, port_timeout,
1833 BUILD_ARG(ps - port_state, cmd));
1834 }
1835
1836 /*===========================================================================*
1837 * port_exec *
1838 *===========================================================================*/
port_exec(struct port_state * ps,int cmd,clock_t timeout)1839 static int port_exec(struct port_state *ps, int cmd, clock_t timeout)
1840 {
1841 /* Execute a command on a port, wait for the command to complete or for
1842 * a timeout, and return whether the command succeeded or not.
1843 */
1844
1845 port_issue(ps, cmd, timeout);
1846
1847 /* Put the thread to sleep until a timeout or a command completion
1848 * happens. Earlier, we used to call port_wait which set the suspended
1849 * flag. We now abandon it since the flag has to work on a per-thread,
1850 * and hence per-tag basis and not on a per-port basis. Instead, we
1851 * retain that call only to defer open calls during device/driver
1852 * initialization. Instead, we call sleep here directly. Before
1853 * sleeping, we register the thread.
1854 */
1855 ps->cmd_info[cmd].tid = blockdriver_mt_get_tid();
1856
1857 blockdriver_mt_sleep();
1858
1859 /* Cancelling a timer that just triggered, does no harm. */
1860 cancel_timer(&ps->cmd_info[cmd].timer);
1861
1862 assert(!(ps->flags & FLAG_BUSY));
1863
1864 dprintf(V_REQ, ("%s: end of command -- %s\n", ahci_portname(ps),
1865 (ps->cmd_info[cmd].result == RESULT_FAILURE) ?
1866 "failure" : "success"));
1867
1868 if (ps->cmd_info[cmd].result == RESULT_FAILURE)
1869 return EIO;
1870
1871 return OK;
1872 }
1873
1874 /*===========================================================================*
1875 * port_alloc *
1876 *===========================================================================*/
port_alloc(struct port_state * ps)1877 static void port_alloc(struct port_state *ps)
1878 {
1879 /* Allocate memory for the given port, and enable FIS receipt. We try
1880 * to cram everything into one 4K-page in order to limit memory usage
1881 * as much as possible. More memory may be allocated on demand later,
1882 * but allocation failure should be fatal only here. Note that we do
1883 * not allocate memory for sector padding here, because we do not know
1884 * the device's sector size yet.
1885 */
1886 size_t fis_off, tmp_off, ct_off; int i;
1887 size_t ct_offs[NR_CMDS];
1888 u32_t cmd;
1889
1890 fis_off = AHCI_CL_SIZE + AHCI_FIS_SIZE - 1;
1891 fis_off -= fis_off % AHCI_FIS_SIZE;
1892
1893 tmp_off = fis_off + AHCI_FIS_SIZE + AHCI_TMP_ALIGN - 1;
1894 tmp_off -= tmp_off % AHCI_TMP_ALIGN;
1895
1896 /* Allocate memory for all the commands. */
1897 ct_off = tmp_off + AHCI_TMP_SIZE;
1898 for (i = 0; i < NR_CMDS; i++) {
1899 ct_off += AHCI_CT_ALIGN - 1;
1900 ct_off -= ct_off % AHCI_CT_ALIGN;
1901 ct_offs[i] = ct_off;
1902 ps->mem_size = ct_off + AHCI_CT_SIZE;
1903 ct_off = ps->mem_size;
1904 }
1905
1906 ps->mem_base = alloc_contig(ps->mem_size, AC_ALIGN4K, &ps->mem_phys);
1907 if (ps->mem_base == NULL)
1908 panic("unable to allocate port memory");
1909 memset(ps->mem_base, 0, ps->mem_size);
1910
1911 ps->cl_base = (u32_t *) ps->mem_base;
1912 ps->cl_phys = ps->mem_phys;
1913 assert(ps->cl_phys % AHCI_CL_SIZE == 0);
1914
1915 ps->fis_base = (u32_t *) (ps->mem_base + fis_off);
1916 ps->fis_phys = ps->mem_phys + fis_off;
1917 assert(ps->fis_phys % AHCI_FIS_SIZE == 0);
1918
1919 ps->tmp_base = (u8_t *) (ps->mem_base + tmp_off);
1920 ps->tmp_phys = ps->mem_phys + tmp_off;
1921 assert(ps->tmp_phys % AHCI_TMP_ALIGN == 0);
1922
1923 for (i = 0; i < NR_CMDS; i++) {
1924 ps->ct_base[i] = ps->mem_base + ct_offs[i];
1925 ps->ct_phys[i] = ps->mem_phys + ct_offs[i];
1926 assert(ps->ct_phys[i] % AHCI_CT_ALIGN == 0);
1927 }
1928
1929 /* Tell the controller about some of the physical addresses. */
1930 port_write(ps, AHCI_PORT_FBU, 0);
1931 port_write(ps, AHCI_PORT_FB, ps->fis_phys);
1932
1933 port_write(ps, AHCI_PORT_CLBU, 0);
1934 port_write(ps, AHCI_PORT_CLB, ps->cl_phys);
1935
1936 /* Enable FIS receive. */
1937 cmd = port_read(ps, AHCI_PORT_CMD);
1938 port_write(ps, AHCI_PORT_CMD, cmd | AHCI_PORT_CMD_FRE);
1939
1940 ps->pad_base = NULL;
1941 ps->pad_size = 0;
1942 }
1943
1944 /*===========================================================================*
1945 * port_free *
1946 *===========================================================================*/
port_free(struct port_state * ps)1947 static void port_free(struct port_state *ps)
1948 {
1949 /* Disable FIS receipt for the given port, and free previously
1950 * allocated memory.
1951 */
1952 u32_t cmd;
1953
1954 /* Disable FIS receive. */
1955 cmd = port_read(ps, AHCI_PORT_CMD);
1956
1957 if (cmd & (AHCI_PORT_CMD_FR | AHCI_PORT_CMD_FRE)) {
1958 port_write(ps, AHCI_PORT_CMD, cmd & ~AHCI_PORT_CMD_FRE);
1959
1960 SPIN_UNTIL(!(port_read(ps, AHCI_PORT_CMD) & AHCI_PORT_CMD_FR),
1961 PORTREG_DELAY);
1962 }
1963
1964 if (ps->pad_base != NULL)
1965 free_contig(ps->pad_base, ps->pad_size);
1966
1967 free_contig(ps->mem_base, ps->mem_size);
1968 }
1969
1970 /*===========================================================================*
1971 * port_init *
1972 *===========================================================================*/
port_init(struct port_state * ps)1973 static void port_init(struct port_state *ps)
1974 {
1975 /* Initialize the given port.
1976 */
1977 u32_t cmd;
1978 int i;
1979
1980 /* Initialize the port state structure. */
1981 ps->queue_depth = 1;
1982 ps->state = STATE_SPIN_UP;
1983 ps->flags = FLAG_BUSY;
1984 ps->sector_size = 0;
1985 ps->open_count = 0;
1986 ps->pend_mask = 0;
1987 for (i = 0; i < NR_CMDS; i++)
1988 init_timer(&ps->cmd_info[i].timer);
1989
1990 ps->reg = (u32_t *) ((u8_t *) hba_state.base +
1991 AHCI_MEM_BASE_SIZE + AHCI_MEM_PORT_SIZE * (ps - port_state));
1992
1993 /* Allocate memory for the port. */
1994 port_alloc(ps);
1995
1996 /* Just listen for device connection events for now. */
1997 port_write(ps, AHCI_PORT_IE, AHCI_PORT_IE_PCE);
1998
1999 /* Enable device spin-up for HBAs that support staggered spin-up.
2000 * This is a no-op for HBAs that do not support it.
2001 */
2002 cmd = port_read(ps, AHCI_PORT_CMD);
2003 port_write(ps, AHCI_PORT_CMD, cmd | AHCI_PORT_CMD_SUD);
2004
2005 /* Trigger a port reset. */
2006 port_hardreset(ps);
2007
2008 set_timer(&ps->cmd_info[0].timer, ahci_spinup_timeout,
2009 port_timeout, BUILD_ARG(ps - port_state, 0));
2010 }
2011
2012 /*===========================================================================*
2013 * ahci_probe *
2014 *===========================================================================*/
ahci_probe(int skip)2015 static int ahci_probe(int skip)
2016 {
2017 /* Find a matching PCI device.
2018 */
2019 int r, devind;
2020 u16_t vid, did;
2021
2022 pci_init();
2023
2024 r = pci_first_dev(&devind, &vid, &did);
2025 if (r <= 0)
2026 return -1;
2027
2028 while (skip--) {
2029 r = pci_next_dev(&devind, &vid, &did);
2030 if (r <= 0)
2031 return -1;
2032 }
2033
2034 pci_reserve(devind);
2035
2036 return devind;
2037 }
2038
2039 /*===========================================================================*
2040 * ahci_reset *
2041 *===========================================================================*/
ahci_reset(void)2042 static void ahci_reset(void)
2043 {
2044 /* Reset the HBA. Do not enable AHCI mode afterwards.
2045 */
2046 u32_t ghc;
2047
2048 ghc = hba_read(AHCI_HBA_GHC);
2049
2050 hba_write(AHCI_HBA_GHC, ghc | AHCI_HBA_GHC_AE);
2051
2052 hba_write(AHCI_HBA_GHC, ghc | AHCI_HBA_GHC_AE | AHCI_HBA_GHC_HR);
2053
2054 SPIN_UNTIL(!(hba_read(AHCI_HBA_GHC) & AHCI_HBA_GHC_HR), RESET_DELAY);
2055
2056 if (hba_read(AHCI_HBA_GHC) & AHCI_HBA_GHC_HR)
2057 panic("unable to reset HBA");
2058 }
2059
2060 /*===========================================================================*
2061 * ahci_init *
2062 *===========================================================================*/
ahci_init(int devind)2063 static void ahci_init(int devind)
2064 {
2065 /* Initialize the device.
2066 */
2067 u32_t base, size, cap, ghc, mask;
2068 int r, port, ioflag;
2069
2070 if ((r = pci_get_bar(devind, PCI_BAR_6, &base, &size, &ioflag)) != OK)
2071 panic("unable to retrieve BAR: %d", r);
2072
2073 if (ioflag)
2074 panic("invalid BAR type");
2075
2076 /* There must be at least one port, and at most NR_PORTS ports. Limit
2077 * the actual total number of ports to the size of the exposed area.
2078 */
2079 if (size < AHCI_MEM_BASE_SIZE + AHCI_MEM_PORT_SIZE)
2080 panic("HBA memory size too small: %u", size);
2081
2082 size = MIN(size, AHCI_MEM_BASE_SIZE + AHCI_MEM_PORT_SIZE * NR_PORTS);
2083
2084 hba_state.nr_ports = (size - AHCI_MEM_BASE_SIZE) / AHCI_MEM_PORT_SIZE;
2085
2086 /* Map the register area into local memory. */
2087 hba_state.base = (u32_t *) vm_map_phys(SELF, (void *) base, size);
2088 hba_state.size = size;
2089 if (hba_state.base == MAP_FAILED)
2090 panic("unable to map HBA memory");
2091
2092 /* Retrieve, allocate and enable the controller's IRQ. */
2093 hba_state.irq = pci_attr_r8(devind, PCI_ILR);
2094 hba_state.hook_id = 0;
2095
2096 if ((r = sys_irqsetpolicy(hba_state.irq, 0, &hba_state.hook_id)) != OK)
2097 panic("unable to register IRQ: %d", r);
2098
2099 if ((r = sys_irqenable(&hba_state.hook_id)) != OK)
2100 panic("unable to enable IRQ: %d", r);
2101
2102 /* Reset the HBA. */
2103 ahci_reset();
2104
2105 /* Enable AHCI and interrupts. */
2106 ghc = hba_read(AHCI_HBA_GHC);
2107 hba_write(AHCI_HBA_GHC, ghc | AHCI_HBA_GHC_AE | AHCI_HBA_GHC_IE);
2108
2109 /* Limit the maximum number of commands to the controller's value. */
2110 /* Note that we currently use only one command anyway. */
2111 cap = hba_read(AHCI_HBA_CAP);
2112 hba_state.has_ncq = !!(cap & AHCI_HBA_CAP_SNCQ);
2113 hba_state.has_clo = !!(cap & AHCI_HBA_CAP_SCLO);
2114 hba_state.nr_cmds = MIN(NR_CMDS,
2115 ((cap >> AHCI_HBA_CAP_NCS_SHIFT) & AHCI_HBA_CAP_NCS_MASK) + 1);
2116
2117 dprintf(V_INFO, ("AHCI%u: HBA v%d.%d%d, %ld ports, %ld commands, "
2118 "%s queuing, IRQ %d\n",
2119 ahci_instance,
2120 (int) (hba_read(AHCI_HBA_VS) >> 16),
2121 (int) ((hba_read(AHCI_HBA_VS) >> 8) & 0xFF),
2122 (int) (hba_read(AHCI_HBA_VS) & 0xFF),
2123 ((cap >> AHCI_HBA_CAP_NP_SHIFT) & AHCI_HBA_CAP_NP_MASK) + 1,
2124 ((cap >> AHCI_HBA_CAP_NCS_SHIFT) & AHCI_HBA_CAP_NCS_MASK) + 1,
2125 hba_state.has_ncq ? "supports" : "no", hba_state.irq));
2126
2127 dprintf(V_INFO, ("AHCI%u: CAP %08x, CAP2 %08x, PI %08x\n",
2128 ahci_instance, cap, hba_read(AHCI_HBA_CAP2),
2129 hba_read(AHCI_HBA_PI)));
2130
2131 /* Initialize each of the implemented ports. We ignore CAP.NP. */
2132 mask = hba_read(AHCI_HBA_PI);
2133
2134 for (port = 0; port < hba_state.nr_ports; port++) {
2135 port_state[port].device = NO_DEVICE;
2136 port_state[port].state = STATE_NO_PORT;
2137
2138 if (mask & (1 << port))
2139 port_init(&port_state[port]);
2140 }
2141 }
2142
2143 /*===========================================================================*
2144 * ahci_stop *
2145 *===========================================================================*/
ahci_stop(void)2146 static void ahci_stop(void)
2147 {
2148 /* Disable AHCI, and clean up resources to the extent possible.
2149 */
2150 struct port_state *ps;
2151 int r, port;
2152
2153 for (port = 0; port < hba_state.nr_ports; port++) {
2154 ps = &port_state[port];
2155
2156 if (ps->state != STATE_NO_PORT) {
2157 port_stop(ps);
2158
2159 port_free(ps);
2160 }
2161 }
2162
2163 ahci_reset();
2164
2165 if ((r = vm_unmap_phys(SELF, (void *) hba_state.base,
2166 hba_state.size)) != OK)
2167 panic("unable to unmap HBA memory: %d", r);
2168
2169 if ((r = sys_irqrmpolicy(&hba_state.hook_id)) != OK)
2170 panic("unable to deregister IRQ: %d", r);
2171 }
2172
2173 /*===========================================================================*
2174 * ahci_alarm *
2175 *===========================================================================*/
ahci_alarm(clock_t stamp)2176 static void ahci_alarm(clock_t stamp)
2177 {
2178 /* Process an alarm.
2179 */
2180
2181 /* Call the port-specific handler for each port that timed out. */
2182 expire_timers(stamp);
2183 }
2184
2185 /*===========================================================================*
2186 * ahci_intr *
2187 *===========================================================================*/
ahci_intr(unsigned int UNUSED (mask))2188 static void ahci_intr(unsigned int UNUSED(mask))
2189 {
2190 /* Process an interrupt.
2191 */
2192 struct port_state *ps;
2193 u32_t mask;
2194 int r, port;
2195
2196 /* Handle an interrupt for each port that has the interrupt bit set. */
2197 mask = hba_read(AHCI_HBA_IS);
2198
2199 for (port = 0; port < hba_state.nr_ports; port++) {
2200 if (mask & (1 << port)) {
2201 ps = &port_state[port];
2202
2203 port_intr(ps);
2204
2205 /* After processing an interrupt, wake up the device
2206 * thread if it is suspended and now no longer busy.
2207 */
2208 if ((ps->flags & (FLAG_SUSPENDED | FLAG_BUSY)) ==
2209 FLAG_SUSPENDED)
2210 blockdriver_mt_wakeup(ps->cmd_info[0].tid);
2211 }
2212 }
2213
2214 /* Clear the bits that we processed. */
2215 hba_write(AHCI_HBA_IS, mask);
2216
2217 /* Reenable the interrupt. */
2218 if ((r = sys_irqenable(&hba_state.hook_id)) != OK)
2219 panic("unable to enable IRQ: %d", r);
2220 }
2221
2222 /*===========================================================================*
2223 * ahci_get_params *
2224 *===========================================================================*/
ahci_get_params(void)2225 static void ahci_get_params(void)
2226 {
2227 /* Retrieve and parse parameters passed to this driver, except the
2228 * device-to-port mapping, which has to be parsed later.
2229 */
2230 long v;
2231 unsigned int i;
2232
2233 /* Find out which driver instance we are. */
2234 v = 0;
2235 (void) env_parse("instance", "d", 0, &v, 0, 255);
2236 ahci_instance = (int) v;
2237
2238 /* Initialize the verbosity level. */
2239 v = V_ERR;
2240 (void) env_parse("ahci_verbose", "d", 0, &v, V_NONE, V_REQ);
2241 ahci_verbose = (int) v;
2242
2243 /* Initialize timeout-related values. */
2244 for (i = 0; i < sizeof(ahci_timevar) / sizeof(ahci_timevar[0]); i++) {
2245 v = ahci_timevar[i].default_ms;
2246
2247 (void) env_parse(ahci_timevar[i].name, "d", 0, &v, 1,
2248 LONG_MAX);
2249
2250 *ahci_timevar[i].ptr = millis_to_hz(v);
2251 }
2252
2253 ahci_device_delay = millis_to_hz(DEVICE_DELAY);
2254 ahci_device_checks = (ahci_device_timeout + ahci_device_delay - 1) /
2255 ahci_device_delay;
2256 }
2257
2258 /*===========================================================================*
2259 * ahci_set_mapping *
2260 *===========================================================================*/
ahci_set_mapping(void)2261 static void ahci_set_mapping(void)
2262 {
2263 /* Construct a mapping from device nodes to port numbers.
2264 */
2265 char key[16], val[32], *p;
2266 unsigned int port;
2267 int i, j;
2268
2269 /* Start off with a mapping that includes implemented ports only, in
2270 * order. We choose this mapping over an identity mapping to maximize
2271 * the chance that the user will be able to access the first MAX_DRIVES
2272 * devices. Note that we can only do this after initializing the HBA.
2273 */
2274 for (i = j = 0; i < NR_PORTS && j < MAX_DRIVES; i++)
2275 if (port_state[i].state != STATE_NO_PORT)
2276 ahci_map[j++] = i;
2277
2278 for ( ; j < MAX_DRIVES; j++)
2279 ahci_map[j] = NO_PORT;
2280
2281 /* See if the user specified a custom mapping. Unlike all other
2282 * configuration options, this is a per-instance setting.
2283 */
2284 strlcpy(key, "ahci0_map", sizeof(key));
2285 key[4] += ahci_instance;
2286
2287 if (env_get_param(key, val, sizeof(val)) == OK) {
2288 /* Parse the mapping, which is assumed to be a comma-separated
2289 * list of zero-based port numbers.
2290 */
2291 p = val;
2292
2293 for (i = 0; i < MAX_DRIVES; i++) {
2294 if (*p) {
2295 port = (unsigned int) strtoul(p, &p, 0);
2296
2297 if (*p) p++;
2298
2299 ahci_map[i] = port % NR_PORTS;
2300 }
2301 else ahci_map[i] = NO_PORT;
2302 }
2303 }
2304
2305 /* Create a reverse mapping. */
2306 for (i = 0; i < MAX_DRIVES; i++)
2307 if ((j = ahci_map[i]) != NO_PORT)
2308 port_state[j].device = i;
2309 }
2310
2311 /*===========================================================================*
2312 * sef_cb_init_fresh *
2313 *===========================================================================*/
sef_cb_init_fresh(int type,sef_init_info_t * UNUSED (info))2314 static int sef_cb_init_fresh(int type, sef_init_info_t *UNUSED(info))
2315 {
2316 /* Initialize the driver.
2317 */
2318 int devind;
2319
2320 /* Get command line parameters. */
2321 ahci_get_params();
2322
2323 /* Probe for recognized devices, skipping matches as appropriate. */
2324 devind = ahci_probe(ahci_instance);
2325
2326 if (devind < 0)
2327 panic("no matching device found");
2328
2329 /* Initialize the device we found. */
2330 ahci_init(devind);
2331
2332 /* Create a mapping from device nodes to port numbers. */
2333 ahci_set_mapping();
2334
2335 /* Announce that we are up. */
2336 blockdriver_announce(type);
2337
2338 return OK;
2339 }
2340
2341 /*===========================================================================*
2342 * sef_cb_signal_handler *
2343 *===========================================================================*/
sef_cb_signal_handler(int signo)2344 static void sef_cb_signal_handler(int signo)
2345 {
2346 /* In case of a termination signal, shut down this driver.
2347 */
2348 int port;
2349
2350 if (signo != SIGTERM) return;
2351
2352 /* If any ports are still opened, assume that the system is being shut
2353 * down, and stay up until the last device has been closed.
2354 */
2355 ahci_exiting = TRUE;
2356
2357 for (port = 0; port < hba_state.nr_ports; port++)
2358 if (port_state[port].open_count > 0)
2359 return;
2360
2361 /* If not, stop the driver and exit immediately. */
2362 ahci_stop();
2363
2364 exit(0);
2365 }
2366
2367 /*===========================================================================*
2368 * sef_local_startup *
2369 *===========================================================================*/
sef_local_startup(void)2370 static void sef_local_startup(void)
2371 {
2372 /* Set callbacks and initialize the System Event Framework (SEF).
2373 */
2374
2375 /* Register init callbacks. */
2376 sef_setcb_init_fresh(sef_cb_init_fresh);
2377
2378 /* Register signal callbacks. */
2379 sef_setcb_signal_handler(sef_cb_signal_handler);
2380
2381 /* Enable support for live update. */
2382 blockdriver_mt_support_lu();
2383
2384 /* Let SEF perform startup. */
2385 sef_startup();
2386 }
2387
2388 /*===========================================================================*
2389 * ahci_portname *
2390 *===========================================================================*/
ahci_portname(struct port_state * ps)2391 static char *ahci_portname(struct port_state *ps)
2392 {
2393 /* Return a printable name for the given port. Whenever we can, print a
2394 * "Dx" device number rather than a "Pxx" port number, because the user
2395 * may not be aware of the mapping currently in use.
2396 */
2397 static char name[] = "AHCI0-P00";
2398
2399 name[4] = '0' + ahci_instance;
2400
2401 if (ps->device == NO_DEVICE) {
2402 name[6] = 'P';
2403 name[7] = '0' + (ps - port_state) / 10;
2404 name[8] = '0' + (ps - port_state) % 10;
2405 }
2406 else {
2407 name[6] = 'D';
2408 name[7] = '0' + ps->device;
2409 name[8] = 0;
2410 }
2411
2412 return name;
2413 }
2414
2415 /*===========================================================================*
2416 * ahci_map_minor *
2417 *===========================================================================*/
ahci_map_minor(devminor_t minor,struct device ** dvp)2418 static struct port_state *ahci_map_minor(devminor_t minor, struct device **dvp)
2419 {
2420 /* Map a minor device number to a port and a pointer to the partition's
2421 * device structure. Return NULL if this minor device number does not
2422 * identify an actual device.
2423 */
2424 struct port_state *ps;
2425 int port;
2426
2427 ps = NULL;
2428
2429 if (minor >= 0 && minor < NR_MINORS) {
2430 port = ahci_map[minor / DEV_PER_DRIVE];
2431
2432 if (port == NO_PORT)
2433 return NULL;
2434
2435 ps = &port_state[port];
2436 *dvp = &ps->part[minor % DEV_PER_DRIVE];
2437 }
2438 else if ((unsigned) (minor -= MINOR_d0p0s0) < NR_SUBDEVS) {
2439 port = ahci_map[minor / SUB_PER_DRIVE];
2440
2441 if (port == NO_PORT)
2442 return NULL;
2443
2444 ps = &port_state[port];
2445 *dvp = &ps->subpart[minor % SUB_PER_DRIVE];
2446 }
2447
2448 return ps;
2449 }
2450
2451 /*===========================================================================*
2452 * ahci_part *
2453 *===========================================================================*/
ahci_part(devminor_t minor)2454 static struct device *ahci_part(devminor_t minor)
2455 {
2456 /* Return a pointer to the partition information structure of the given
2457 * minor device.
2458 */
2459 struct device *dv;
2460
2461 if (ahci_map_minor(minor, &dv) == NULL)
2462 return NULL;
2463
2464 return dv;
2465 }
2466
2467 /*===========================================================================*
2468 * ahci_open *
2469 *===========================================================================*/
ahci_open(devminor_t minor,int access)2470 static int ahci_open(devminor_t minor, int access)
2471 {
2472 /* Open a device.
2473 */
2474 struct port_state *ps;
2475 int r;
2476
2477 ps = ahci_get_port(minor);
2478
2479 /* Only one open request can be processed at a time, due to the fact
2480 * that it is an exclusive operation. The thread that handles this call
2481 * can therefore freely register itself at slot zero.
2482 */
2483 ps->cmd_info[0].tid = blockdriver_mt_get_tid();
2484
2485 /* If we are still in the process of initializing this port or device,
2486 * wait for completion of that phase first.
2487 */
2488 if (ps->flags & FLAG_BUSY)
2489 port_wait(ps);
2490
2491 /* The device may only be opened if it is now properly functioning. */
2492 if (ps->state != STATE_GOOD_DEV)
2493 return ENXIO;
2494
2495 /* Some devices may only be opened in read-only mode. */
2496 if ((ps->flags & FLAG_READONLY) && (access & BDEV_W_BIT))
2497 return EACCES;
2498
2499 if (ps->open_count == 0) {
2500 /* The first open request. Clear the barrier flag, if set. */
2501 ps->flags &= ~FLAG_BARRIER;
2502
2503 /* Recheck media only when nobody is using the device. */
2504 if ((ps->flags & FLAG_ATAPI) &&
2505 (r = atapi_check_medium(ps, 0)) != OK)
2506 return r;
2507
2508 /* After rechecking the media, the partition table must always
2509 * be read. This is also a convenient time to do it for
2510 * nonremovable devices. Start by resetting the partition
2511 * tables and setting the working size of the entire device.
2512 */
2513 memset(ps->part, 0, sizeof(ps->part));
2514 memset(ps->subpart, 0, sizeof(ps->subpart));
2515
2516 ps->part[0].dv_size = ps->lba_count * ps->sector_size;
2517
2518 partition(&ahci_dtab, ps->device * DEV_PER_DRIVE, P_PRIMARY,
2519 !!(ps->flags & FLAG_ATAPI));
2520
2521 blockdriver_mt_set_workers(ps->device, ps->queue_depth);
2522 }
2523 else {
2524 /* If the barrier flag is set, deny new open requests until the
2525 * device is fully closed first.
2526 */
2527 if (ps->flags & FLAG_BARRIER)
2528 return ENXIO;
2529 }
2530
2531 ps->open_count++;
2532
2533 return OK;
2534 }
2535
2536 /*===========================================================================*
2537 * ahci_close *
2538 *===========================================================================*/
ahci_close(devminor_t minor)2539 static int ahci_close(devminor_t minor)
2540 {
2541 /* Close a device.
2542 */
2543 struct port_state *ps;
2544 int port;
2545
2546 ps = ahci_get_port(minor);
2547
2548 /* Decrease the open count. */
2549 if (ps->open_count <= 0) {
2550 dprintf(V_ERR, ("%s: closing already-closed port\n",
2551 ahci_portname(ps)));
2552
2553 return EINVAL;
2554 }
2555
2556 ps->open_count--;
2557
2558 if (ps->open_count > 0)
2559 return OK;
2560
2561 /* The device is now fully closed. That also means that the threads for
2562 * this device are not needed anymore, so we reduce the count to one.
2563 */
2564 blockdriver_mt_set_workers(ps->device, 1);
2565
2566 if (ps->state == STATE_GOOD_DEV && !(ps->flags & FLAG_BARRIER)) {
2567 dprintf(V_INFO, ("%s: flushing write cache\n",
2568 ahci_portname(ps)));
2569
2570 (void) gen_flush_wcache(ps);
2571 }
2572
2573 /* If the entire driver has been told to terminate, check whether all
2574 * devices are now closed. If so, tell libblockdriver to quit after
2575 * replying to the close request.
2576 */
2577 if (ahci_exiting) {
2578 for (port = 0; port < hba_state.nr_ports; port++)
2579 if (port_state[port].open_count > 0)
2580 break;
2581
2582 if (port == hba_state.nr_ports) {
2583 ahci_stop();
2584
2585 blockdriver_mt_terminate();
2586 }
2587 }
2588
2589 return OK;
2590 }
2591
2592 /*===========================================================================*
2593 * ahci_transfer *
2594 *===========================================================================*/
ahci_transfer(devminor_t minor,int do_write,u64_t position,endpoint_t endpt,iovec_t * iovec,unsigned int count,int flags)2595 static ssize_t ahci_transfer(devminor_t minor, int do_write, u64_t position,
2596 endpoint_t endpt, iovec_t *iovec, unsigned int count, int flags)
2597 {
2598 /* Perform data transfer on the selected device.
2599 */
2600 struct port_state *ps;
2601 struct device *dv;
2602 u64_t pos, eof;
2603
2604 ps = ahci_get_port(minor);
2605 dv = ahci_part(minor);
2606
2607 if (ps->state != STATE_GOOD_DEV || (ps->flags & FLAG_BARRIER))
2608 return EIO;
2609
2610 if (count > NR_IOREQS)
2611 return EINVAL;
2612
2613 /* Check for basic end-of-partition condition: if the start position of
2614 * the request is outside the partition, return success immediately.
2615 * The size of the request is obtained, and possibly reduced, later.
2616 */
2617 if (position >= dv->dv_size)
2618 return OK;
2619
2620 pos = dv->dv_base + position;
2621 eof = dv->dv_base + dv->dv_size;
2622
2623 return port_transfer(ps, pos, eof, endpt, (iovec_s_t *) iovec, count,
2624 do_write, flags);
2625 }
2626
2627 /*===========================================================================*
2628 * ahci_ioctl *
2629 *===========================================================================*/
ahci_ioctl(devminor_t minor,unsigned long request,endpoint_t endpt,cp_grant_id_t grant,endpoint_t UNUSED (user_endpt))2630 static int ahci_ioctl(devminor_t minor, unsigned long request,
2631 endpoint_t endpt, cp_grant_id_t grant, endpoint_t UNUSED(user_endpt))
2632 {
2633 /* Process I/O control requests.
2634 */
2635 struct port_state *ps;
2636 int r, val;
2637
2638 ps = ahci_get_port(minor);
2639
2640 switch (request) {
2641 case DIOCEJECT:
2642 if (ps->state != STATE_GOOD_DEV || (ps->flags & FLAG_BARRIER))
2643 return EIO;
2644
2645 if (!(ps->flags & FLAG_ATAPI))
2646 return EINVAL;
2647
2648 return atapi_load_eject(ps, 0, FALSE /*load*/);
2649
2650 case DIOCOPENCT:
2651 return sys_safecopyto(endpt, grant, 0,
2652 (vir_bytes) &ps->open_count, sizeof(ps->open_count));
2653
2654 case DIOCFLUSH:
2655 if (ps->state != STATE_GOOD_DEV || (ps->flags & FLAG_BARRIER))
2656 return EIO;
2657
2658 return gen_flush_wcache(ps);
2659
2660 case DIOCSETWC:
2661 if (ps->state != STATE_GOOD_DEV || (ps->flags & FLAG_BARRIER))
2662 return EIO;
2663
2664 if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes) &val,
2665 sizeof(val))) != OK)
2666 return r;
2667
2668 return gen_set_wcache(ps, val);
2669
2670 case DIOCGETWC:
2671 if (ps->state != STATE_GOOD_DEV || (ps->flags & FLAG_BARRIER))
2672 return EIO;
2673
2674 if ((r = gen_get_wcache(ps, &val)) != OK)
2675 return r;
2676
2677 return sys_safecopyto(endpt, grant, 0, (vir_bytes) &val,
2678 sizeof(val));
2679 }
2680
2681 return ENOTTY;
2682 }
2683
2684 /*===========================================================================*
2685 * ahci_device *
2686 *===========================================================================*/
ahci_device(devminor_t minor,device_id_t * id)2687 static int ahci_device(devminor_t minor, device_id_t *id)
2688 {
2689 /* Map a minor device number to a device ID.
2690 */
2691 struct port_state *ps;
2692 struct device *dv;
2693
2694 if ((ps = ahci_map_minor(minor, &dv)) == NULL)
2695 return ENXIO;
2696
2697 *id = ps->device;
2698
2699 return OK;
2700 }
2701
2702 /*===========================================================================*
2703 * ahci_get_port *
2704 *===========================================================================*/
ahci_get_port(devminor_t minor)2705 static struct port_state *ahci_get_port(devminor_t minor)
2706 {
2707 /* Get the port structure associated with the given minor device.
2708 * Called only from worker threads, so the minor device is already
2709 * guaranteed to map to a port.
2710 */
2711 struct port_state *ps;
2712 struct device *dv;
2713
2714 if ((ps = ahci_map_minor(minor, &dv)) == NULL)
2715 panic("device mapping for minor %d disappeared", minor);
2716
2717 return ps;
2718 }
2719
2720 /*===========================================================================*
2721 * main *
2722 *===========================================================================*/
main(int argc,char ** argv)2723 int main(int argc, char **argv)
2724 {
2725 /* Driver task.
2726 */
2727
2728 env_setargs(argc, argv);
2729 sef_local_startup();
2730
2731 blockdriver_mt_task(&ahci_dtab);
2732
2733 return 0;
2734 }
2735