Lines Matching +full:ignore +full:- +full:power +full:- +full:on +full:- +full:sel
1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * bhyve PCIe-NVMe device emulation.
34 * -s <n>,nvme,devpath,maxq=#,qsz=#,ioslots=#,sectsz=#,ser=A-Z,eui64=#,dsm=<opt>
45 * ser = serial number (20-chars max)
52 - create async event for smart and log
53 - intr coalesce
120 /* Convert a zero-based value into a one-based value */
122 /* Convert a one-based value into a zero-based value */
123 #define ZERO_BASED(one) ((one) - 1)
127 (ZERO_BASED((sc)->num_squeues) & 0xffff) | \
128 (ZERO_BASED((sc)->num_cqueues) & 0xffff) << 16
199 * based on the advertised Max Data Transfer (MDTS) and given the number of
204 NVME_MAX_IOVEC - BLOCKIF_IOV_MAX : \
268 /** Asynchronous Event Information - Error */
279 /** Asynchronous Event Information - Notice */
484 sc->num_squeues = nsq;
486 sc->submit_queues = calloc(sc->num_squeues + 1,
488 if (sc->submit_queues == NULL) {
490 sc->num_squeues = 0;
492 struct nvme_submission_queue *sq = sc->submit_queues;
494 for (i = 0; i < sc->num_squeues + 1; i++)
507 sc->num_cqueues = ncq;
509 sc->compl_queues = calloc(sc->num_cqueues + 1,
511 if (sc->compl_queues == NULL) {
513 sc->num_cqueues = 0;
515 struct nvme_completion_queue *cq = sc->compl_queues;
517 for (i = 0; i < sc->num_cqueues + 1; i++)
525 struct nvme_controller_data *cd = &sc->ctrldata;
528 cd->vid = 0xFB5D;
529 cd->ssvid = 0x0000;
531 cpywithpad((char *)cd->mn, sizeof(cd->mn), "bhyve-NVMe", ' ');
532 cpywithpad((char *)cd->fr, sizeof(cd->fr), "1.0", ' ');
535 cd->rab = 4;
538 cd->ieee[0] = 0xfc;
539 cd->ieee[1] = 0x9c;
540 cd->ieee[2] = 0x58;
542 cd->mic = 0;
544 cd->mdts = NVME_MDTS; /* max data transfer size (2^mdts * CAP.MPSMIN) */
546 cd->ver = NVME_REV(1,4);
548 cd->cntrltype = NVME_CNTRLTYPE_IO;
549 cd->oacs = NVMEF(NVME_CTRLR_DATA_OACS_FORMAT, 1);
550 cd->oaes = NVMEM(NVME_CTRLR_DATA_OAES_NS_ATTR);
551 cd->acl = 2;
552 cd->aerl = 4;
554 /* Advertise 1, Read-only firmware slot */
555 cd->frmw = NVMEM(NVME_CTRLR_DATA_FRMW_SLOT1_RO) |
557 cd->lpa = 0; /* TODO: support some simple things like SMART */
558 cd->elpe = 0; /* max error log page entries */
560 * Report a single power state (zero-based value)
563 cd->npss = 0;
566 cd->wctemp = 0x0157;
567 cd->cctemp = 0x0157;
570 cd->sanicap = NVMEF(NVME_CTRLR_DATA_SANICAP_NODMMAS,
573 cd->sqes = NVMEF(NVME_CTRLR_DATA_SQES_MAX, 6) |
575 cd->cqes = NVMEF(NVME_CTRLR_DATA_CQES_MAX, 4) |
577 cd->nn = 1; /* number of namespaces */
579 cd->oncs = 0;
580 switch (sc->dataset_management) {
582 if (sc->nvstore.deallocate)
583 cd->oncs |= NVME_ONCS_DSM;
586 cd->oncs |= NVME_ONCS_DSM;
592 cd->fna = NVMEM(NVME_CTRLR_DATA_FNA_FORMAT_ALL);
594 cd->vwc = NVMEF(NVME_CTRLR_DATA_VWC_ALL, NVME_CTRLR_DATA_VWC_ALL_NO);
596 ret = snprintf(cd->subnqn, sizeof(cd->subnqn),
597 "nqn.2013-12.org.freebsd:bhyve-%s-%u-%u-%u",
598 get_config_value("name"), sc->nsc_pi->pi_bus,
599 sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func);
600 if ((ret < 0) || ((unsigned)ret > sizeof(cd->subnqn)))
610 nd->nsze = nvstore->size / nvstore->sectsz;
611 nd->ncap = nd->nsze;
612 nd->nuse = nd->nsze;
623 if (nvstore->type == NVME_STOR_BLOCKIF)
624 nvstore->deallocate = blockif_candelete(nvstore->ctx);
626 nd->nlbaf = 0; /* NLBAF is a 0's based value (i.e. 1 LBA Format) */
627 nd->flbas = 0;
629 /* Create an EUI-64 if user did not provide one */
630 if (nvstore->eui64 == 0) {
632 uint64_t eui64 = nvstore->eui64;
635 sc->nsc_pi->pi_bus, sc->nsc_pi->pi_slot,
636 sc->nsc_pi->pi_func);
642 nvstore->eui64 = (eui64 << 16) | (nsid & 0xffff);
644 be64enc(nd->eui64, nvstore->eui64);
646 /* LBA data-sz = 2^lbads */
647 nd->lbaf[0] = NVMEF(NVME_NS_DATA_LBAF_LBADS, nvstore->sectsz_bits);
655 memset(&sc->err_log, 0, sizeof(sc->err_log));
656 memset(&sc->health_log, 0, sizeof(sc->health_log));
657 memset(&sc->fw_log, 0, sizeof(sc->fw_log));
658 memset(&sc->ns_log, 0, sizeof(sc->ns_log));
661 sc->read_dunits_remainder = 999;
662 sc->write_dunits_remainder = 999;
665 sc->health_log.temperature = NVME_TEMPERATURE;
666 sc->health_log.available_spare = 100;
667 sc->health_log.available_spare_threshold = 10;
670 sc->fw_log.afi = NVMEF(NVME_FIRMWARE_PAGE_AFI_SLOT, 1);
671 memcpy(&sc->fw_log.revision[0], sc->ctrldata.fr,
672 sizeof(sc->fw_log.revision[0]));
674 memcpy(&sc->health_log.power_cycles, &power_cycles,
675 sizeof(sc->health_log.power_cycles));
690 //XXX hang - case NVME_FEAT_PREDICTABLE_LATENCY_MODE_CONFIG:
691 //XXX hang - case NVME_FEAT_HOST_BEHAVIOR_SUPPORT:
695 sc->feat[fid].set = nvme_feature_temperature;
698 sc->feat[fid].namespace_specific = true;
701 sc->feat[fid].set = nvme_feature_num_queues;
704 sc->feat[fid].set = nvme_feature_iv_config;
707 sc->feat[fid].set = nvme_feature_async_event;
709 sc->feat[fid].cdw11 = PCI_NVME_AEN_DEFAULT_MASK;
712 sc->feat[fid].set = nvme_feature_invalid_cb;
713 sc->feat[fid].get = nvme_feature_invalid_cb;
722 STAILQ_INIT(&sc->aer_list);
723 sc->aer_count = 0;
730 pthread_mutex_init(&sc->aer_mtx, NULL);
739 pthread_mutex_lock(&sc->aer_mtx);
740 while (!STAILQ_EMPTY(&sc->aer_list)) {
741 aer = STAILQ_FIRST(&sc->aer_list);
742 STAILQ_REMOVE_HEAD(&sc->aer_list, link);
745 pthread_mutex_unlock(&sc->aer_mtx);
754 return (sc->aer_count != 0);
760 struct nvme_controller_data *cd = &sc->ctrldata;
763 return (sc->aer_count == (cd->aerl + 1U));
781 return (-1);
784 aer->cid = cid;
786 pthread_mutex_lock(&sc->aer_mtx);
787 sc->aer_count++;
788 STAILQ_INSERT_TAIL(&sc->aer_list, aer, link);
789 pthread_mutex_unlock(&sc->aer_mtx);
805 pthread_mutex_lock(&sc->aer_mtx);
806 aer = STAILQ_FIRST(&sc->aer_list);
808 STAILQ_REMOVE_HEAD(&sc->aer_list, link);
809 sc->aer_count--;
811 pthread_mutex_unlock(&sc->aer_mtx);
821 memset(sc->aen, 0, PCI_NVME_AE_TYPE_MAX * sizeof(struct pci_nvme_aen));
824 sc->aen[atype].atype = atype;
835 pthread_mutex_init(&sc->aen_mtx, NULL);
836 pthread_create(&sc->aen_tid, NULL, aen_thr, sc);
837 snprintf(nstr, sizeof(nstr), "nvme-aen-%d:%d", sc->nsc_pi->pi_slot,
838 sc->nsc_pi->pi_func);
839 pthread_set_name_np(sc->aen_tid, nstr);
854 pthread_cond_signal(&sc->aen_cond);
870 pthread_mutex_lock(&sc->aen_mtx);
871 aen = &sc->aen[atype];
874 if (aen->posted) {
875 pthread_mutex_unlock(&sc->aen_mtx);
879 aen->event_data = event_data;
880 aen->posted = true;
881 pthread_mutex_unlock(&sc->aen_mtx);
898 assert(pthread_mutex_isowned_np(&sc->aen_mtx));
900 aen = &sc->aen[atype];
907 if (!aen->posted) {
916 sc->feat[NVME_FEAT_ASYNC_EVENT_CONFIGURATION].cdw11;
918 DPRINTF("%s: atype=%#x mask=%#x event_data=%#x", __func__, atype, mask, aen->event_data);
925 if ((mask & aen->event_data) == 0)
930 if (aen->event_data >= PCI_NVME_AEI_NOTICE_MAX) {
932 __func__, aen->event_data);
937 if ((PCI_NVME_AEI_NOTICE_MASK(aen->event_data) & mask) == 0)
939 switch (aen->event_data) {
976 DPRINTF("%s: CID=%#x CDW0=%#x", __func__, aer->cid, (lid << 16) | (aen->event_data << 8) | atype);
977 pci_nvme_cq_update(sc, &sc->compl_queues[0],
978 (lid << 16) | (aen->event_data << 8) | atype, /* cdw0 */
979 aer->cid,
983 aen->event_data = 0;
984 aen->posted = false;
986 pci_generate_msix(sc->nsc_pi, 0);
997 pthread_mutex_lock(&sc->aen_mtx);
1000 pthread_cond_wait(&sc->aen_cond, &sc->aen_mtx);
1002 pthread_mutex_unlock(&sc->aen_mtx);
1015 sc->regs.cap_lo = (ZERO_BASED(sc->max_qentries) & NVME_CAP_LO_REG_MQES_MASK) |
1019 sc->regs.cap_hi = NVMEF(NVME_CAP_HI_REG_CSS_NVM, 1);
1021 sc->regs.vs = NVME_REV(1,4); /* NVMe v1.4 */
1023 sc->regs.cc = 0;
1025 assert(sc->submit_queues != NULL);
1027 for (i = 0; i < sc->num_squeues + 1; i++) {
1028 sc->submit_queues[i].qbase = NULL;
1029 sc->submit_queues[i].size = 0;
1030 sc->submit_queues[i].cqid = 0;
1031 sc->submit_queues[i].tail = 0;
1032 sc->submit_queues[i].head = 0;
1035 assert(sc->compl_queues != NULL);
1037 for (i = 0; i < sc->num_cqueues + 1; i++) {
1038 sc->compl_queues[i].qbase = NULL;
1039 sc->compl_queues[i].size = 0;
1040 sc->compl_queues[i].tail = 0;
1041 sc->compl_queues[i].head = 0;
1044 sc->num_q_is_set = false;
1053 sc->regs.csts = 0;
1059 pthread_mutex_lock(&sc->mtx);
1061 pthread_mutex_unlock(&sc->mtx);
1076 asqs = ONE_BASED(NVMEV(NVME_AQA_REG_ASQS, sc->regs.aqa));
1079 asqs - 1, sc->regs.aqa);
1080 sc->regs.csts |= NVME_CSTS_CFS;
1081 return (-1);
1083 sc->submit_queues[0].size = asqs;
1084 sc->submit_queues[0].qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx,
1085 sc->regs.asq, sizeof(struct nvme_command) * asqs);
1086 if (sc->submit_queues[0].qbase == NULL) {
1088 sc->regs.asq);
1089 sc->regs.csts |= NVME_CSTS_CFS;
1090 return (-1);
1093 DPRINTF("%s mapping Admin-SQ guest 0x%lx, host: %p",
1094 __func__, sc->regs.asq, sc->submit_queues[0].qbase);
1096 acqs = ONE_BASED(NVMEV(NVME_AQA_REG_ACQS, sc->regs.aqa));
1099 acqs - 1, sc->regs.aqa);
1100 sc->regs.csts |= NVME_CSTS_CFS;
1101 return (-1);
1103 sc->compl_queues[0].size = acqs;
1104 sc->compl_queues[0].qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx,
1105 sc->regs.acq, sizeof(struct nvme_completion) * acqs);
1106 if (sc->compl_queues[0].qbase == NULL) {
1108 sc->regs.acq);
1109 sc->regs.csts |= NVME_CSTS_CFS;
1110 return (-1);
1112 sc->compl_queues[0].intr_en = NVME_CQ_INTEN;
1114 DPRINTF("%s mapping Admin-CQ guest 0x%lx, host: %p",
1115 __func__, sc->regs.acq, sc->compl_queues[0].qbase);
1128 return (-1);
1132 bytes = PAGE_SIZE - (prp1 & PAGE_MASK);
1137 return (-1);
1147 len -= bytes;
1156 return (-1);
1180 struct nvme_submission_queue *sq = &sc->submit_queues[sqid];
1183 assert(cq->qbase != NULL);
1185 pthread_mutex_lock(&cq->mtx);
1187 cqe = &cq->qbase[cq->tail];
1190 status |= (cqe->status ^ NVME_STATUS_P) & NVME_STATUS_P_MASK;
1192 cqe->cdw0 = cdw0;
1193 cqe->sqhd = sq->head;
1194 cqe->sqid = sqid;
1195 cqe->cid = cid;
1196 cqe->status = status;
1198 cq->tail++;
1199 if (cq->tail >= cq->size) {
1200 cq->tail = 0;
1203 pthread_mutex_unlock(&cq->mtx);
1210 uint16_t qid = command->cdw10 & 0xffff;
1213 if (qid == 0 || qid > sc->num_squeues ||
1214 (sc->submit_queues[qid].qbase == NULL)) {
1216 __func__, qid, sc->num_squeues);
1217 pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
1222 sc->submit_queues[qid].qbase = NULL;
1223 sc->submit_queues[qid].cqid = 0;
1224 pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
1232 if (command->cdw11 & NVME_CMD_CDW11_PC) {
1233 uint16_t qid = command->cdw10 & 0xffff;
1236 if ((qid == 0) || (qid > sc->num_squeues) ||
1237 (sc->submit_queues[qid].qbase != NULL)) {
1239 __func__, qid, sc->num_squeues);
1240 pci_nvme_status_tc(&compl->status,
1246 nsq = &sc->submit_queues[qid];
1247 nsq->size = ONE_BASED((command->cdw10 >> 16) & 0xffff);
1248 DPRINTF("%s size=%u (max=%u)", __func__, nsq->size, sc->max_qentries);
1249 if ((nsq->size < 2) || (nsq->size > sc->max_qentries)) {
1255 pci_nvme_status_tc(&compl->status,
1260 nsq->head = nsq->tail = 0;
1262 nsq->cqid = (command->cdw11 >> 16) & 0xffff;
1263 if ((nsq->cqid == 0) || (nsq->cqid > sc->num_cqueues)) {
1264 pci_nvme_status_tc(&compl->status,
1270 if (sc->compl_queues[nsq->cqid].qbase == NULL) {
1271 pci_nvme_status_tc(&compl->status,
1277 nsq->qpriority = (command->cdw11 >> 1) & 0x03;
1279 nsq->qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1,
1280 sizeof(struct nvme_command) * (size_t)nsq->size);
1283 qid, nsq->size, nsq->qbase, nsq->cqid);
1285 pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
1291 * Guest sent non-cont submission queue request.
1294 WPRINTF("%s unsupported non-contig (list-based) "
1297 pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
1306 uint16_t qid = command->cdw10 & 0xffff;
1310 if (qid == 0 || qid > sc->num_cqueues ||
1311 (sc->compl_queues[qid].qbase == NULL)) {
1313 __func__, qid, sc->num_cqueues);
1314 pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
1320 for (sqid = 1; sqid < sc->num_squeues + 1; sqid++)
1321 if (sc->submit_queues[sqid].cqid == qid) {
1322 pci_nvme_status_tc(&compl->status,
1328 sc->compl_queues[qid].qbase = NULL;
1329 pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
1338 uint16_t qid = command->cdw10 & 0xffff;
1341 if ((command->cdw11 & NVME_CMD_CDW11_PC) == 0) {
1342 WPRINTF("%s unsupported non-contig (list-based) "
1346 pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
1350 if ((qid == 0) || (qid > sc->num_cqueues) ||
1351 (sc->compl_queues[qid].qbase != NULL)) {
1353 __func__, qid, sc->num_cqueues);
1354 pci_nvme_status_tc(&compl->status,
1360 ncq = &sc->compl_queues[qid];
1361 ncq->intr_en = (command->cdw11 & NVME_CMD_CDW11_IEN) >> 1;
1362 ncq->intr_vec = (command->cdw11 >> 16) & 0xffff;
1363 if (ncq->intr_vec > (sc->max_queues + 1)) {
1364 pci_nvme_status_tc(&compl->status,
1370 ncq->size = ONE_BASED((command->cdw10 >> 16) & 0xffff);
1371 if ((ncq->size < 2) || (ncq->size > sc->max_qentries)) {
1377 pci_nvme_status_tc(&compl->status,
1382 ncq->head = ncq->tail = 0;
1383 ncq->qbase = vm_map_gpa(sc->nsc_pi->pi_vmctx,
1384 command->prp1,
1385 sizeof(struct nvme_command) * (size_t)ncq->size);
1387 pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
1401 pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
1405 * and NUMDL. This is a zero-based value.
1407 logpage = command->cdw10 & 0xFF;
1408 logsize = ((command->cdw11 << 16) | (command->cdw10 >> 16)) + 1;
1410 logoff = ((uint64_t)(command->cdw13) << 32) | command->cdw12;
1416 if (logoff >= sizeof(sc->err_log)) {
1417 pci_nvme_status_genc(&compl->status,
1422 nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1,
1423 command->prp2, (uint8_t *)&sc->err_log + logoff,
1424 MIN(logsize, sizeof(sc->err_log) - logoff),
1428 if (logoff >= sizeof(sc->health_log)) {
1429 pci_nvme_status_genc(&compl->status,
1434 pthread_mutex_lock(&sc->mtx);
1435 memcpy(&sc->health_log.data_units_read, &sc->read_data_units,
1436 sizeof(sc->health_log.data_units_read));
1437 memcpy(&sc->health_log.data_units_written, &sc->write_data_units,
1438 sizeof(sc->health_log.data_units_written));
1439 memcpy(&sc->health_log.host_read_commands, &sc->read_commands,
1440 sizeof(sc->health_log.host_read_commands));
1441 memcpy(&sc->health_log.host_write_commands, &sc->write_commands,
1442 sizeof(sc->health_log.host_write_commands));
1443 pthread_mutex_unlock(&sc->mtx);
1445 nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1,
1446 command->prp2, (uint8_t *)&sc->health_log + logoff,
1447 MIN(logsize, sizeof(sc->health_log) - logoff),
1451 if (logoff >= sizeof(sc->fw_log)) {
1452 pci_nvme_status_genc(&compl->status,
1457 nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1,
1458 command->prp2, (uint8_t *)&sc->fw_log + logoff,
1459 MIN(logsize, sizeof(sc->fw_log) - logoff),
1463 if (logoff >= sizeof(sc->ns_log)) {
1464 pci_nvme_status_genc(&compl->status,
1469 nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1,
1470 command->prp2, (uint8_t *)&sc->ns_log + logoff,
1471 MIN(logsize, sizeof(sc->ns_log) - logoff),
1473 memset(&sc->ns_log, 0, sizeof(sc->ns_log));
1479 pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
1494 command->cdw10 & 0xFF, command->nsid);
1499 switch (command->cdw10 & 0xFF) {
1502 if (command->nsid == NVME_GLOBAL_NAMESPACE_TAG) {
1507 nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1,
1508 command->prp2, (uint8_t *)&sc->nsdata, sizeof(sc->nsdata),
1512 nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1,
1513 command->prp2, (uint8_t *)&sc->ctrldata,
1514 sizeof(sc->ctrldata),
1518 dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1,
1525 if (command->nsid != 1) {
1530 dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1,
1538 memcpy(((uint8_t *)dest) + 4, sc->nsdata.eui64, sizeof(uint64_t));
1545 dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1,
1551 __func__, command->cdw10 & 0xFF);
1556 compl->status = status;
1570 name = "Power Management";
1600 name = "Autonomous Power State Transition";
1615 name = "Non-Operation Power State Config";
1667 pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
1677 uint32_t cdw11 = command->cdw11;
1681 pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
1686 if (iv > (sc->max_queues + 1)) {
1695 for (i = 0; i < sc->num_cqueues + 1; i++) {
1696 if (sc->compl_queues[i].intr_vec == iv) {
1697 pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
1709 if (command->cdw11 & NVME_ASYNC_EVENT_ENDURANCE_GROUP)
1710 pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
1727 tmpth = command->cdw11 & 0xffff;
1728 tmpsel = (command->cdw11 >> 16) & 0xf;
1729 thsel = (command->cdw11 >> 20) & 0x3;
1736 pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
1744 pthread_mutex_lock(&sc->mtx);
1746 sc->health_log.critical_warning |=
1749 sc->health_log.critical_warning &=
1751 pthread_mutex_unlock(&sc->mtx);
1753 report_crit = sc->feat[NVME_FEAT_ASYNC_EVENT_CONFIGURATION].cdw11 &
1758 sc->health_log.critical_warning);
1760 DPRINTF("%s: set_crit=%c critical_warning=%#x status=%#x", __func__, set_crit ? 'T':'F', sc->health_log.critical_warning, compl->status);
1771 if (sc->num_q_is_set) {
1773 pci_nvme_status_genc(&compl->status,
1778 nqr = command->cdw11 & 0xFFFF;
1781 pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
1785 sc->num_squeues = ONE_BASED(nqr);
1786 if (sc->num_squeues > sc->max_queues) {
1787 DPRINTF("NSQR=%u is greater than max %u", sc->num_squeues,
1788 sc->max_queues);
1789 sc->num_squeues = sc->max_queues;
1792 nqr = (command->cdw11 >> 16) & 0xFFFF;
1795 pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
1799 sc->num_cqueues = ONE_BASED(nqr);
1800 if (sc->num_cqueues > sc->max_queues) {
1801 DPRINTF("NCQR=%u is greater than max %u", sc->num_cqueues,
1802 sc->max_queues);
1803 sc->num_cqueues = sc->max_queues;
1806 /* Patch the command value which will be saved on callback's return */
1807 command->cdw11 = NVME_FEATURE_NUM_QUEUES(sc);
1808 compl->cdw0 = NVME_FEATURE_NUM_QUEUES(sc);
1810 sc->num_q_is_set = true;
1818 uint32_t nsid = command->nsid;
1819 uint8_t fid = NVMEV(NVME_FEAT_SET_FID, command->cdw10);
1820 bool sv = NVMEV(NVME_FEAT_SET_SV, command->cdw10);
1826 pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
1831 pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
1836 feat = &sc->feat[fid];
1838 if (feat->namespace_specific && (nsid == NVME_GLOBAL_NAMESPACE_TAG)) {
1839 pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
1843 if (!feat->namespace_specific &&
1845 pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
1850 compl->cdw0 = 0;
1851 pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
1853 if (feat->set)
1854 feat->set(sc, feat, command, compl);
1856 pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
1861 DPRINTF("%s: status=%#x cdw11=%#x", __func__, compl->status, command->cdw11);
1862 if (compl->status == NVME_SC_SUCCESS) {
1863 feat->cdw11 = command->cdw11;
1865 (command->cdw11 != 0))
1880 uint8_t fid = command->cdw10 & 0xFF;
1881 uint8_t sel = (command->cdw10 >> 8) & 0x7;
1887 pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
1891 compl->cdw0 = 0;
1892 pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
1894 feat = &sc->feat[fid];
1895 if (feat->get) {
1896 feat->get(sc, feat, command, compl);
1899 if (compl->status == NVME_SC_SUCCESS) {
1900 if ((sel == NVME_FEATURES_SEL_SUPPORTED) && feat->namespace_specific)
1901 compl->cdw0 = NVME_FEATURES_NS_SPECIFIC;
1903 compl->cdw0 = feat->cdw11;
1915 /* Only supports Secure Erase Setting - User Data Erase */
1916 ses = (command->cdw10 >> 9) & 0x7;
1918 pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
1923 lbaf = command->cdw10 & 0xf;
1925 pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
1931 pi = (command->cdw10 >> 5) & 0x7;
1933 pci_nvme_status_genc(&compl->status, NVME_SC_INVALID_FIELD);
1937 if (sc->nvstore.type == NVME_STOR_RAM) {
1938 if (sc->nvstore.ctx)
1939 free(sc->nvstore.ctx);
1940 sc->nvstore.ctx = calloc(1, sc->nvstore.size);
1941 pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
1948 pci_nvme_status_genc(&compl->status,
1953 req->nvme_sq = &sc->submit_queues[0];
1954 req->sqid = 0;
1955 req->opc = command->opc;
1956 req->cid = command->cid;
1957 req->nsid = command->nsid;
1959 req->io_req.br_offset = 0;
1960 req->io_req.br_resid = sc->nvstore.size;
1961 req->io_req.br_callback = pci_nvme_io_done;
1963 err = blockif_delete(sc->nvstore.ctx, &req->io_req);
1965 pci_nvme_status_genc(&compl->status,
1969 compl->status = NVME_NO_STATUS;
1980 command->cdw10 & 0xFFFF, (command->cdw10 >> 16) & 0xFFFF);
1984 compl->cdw0 = 1;
1985 pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
1994 sc->aer_count, sc->ctrldata.aerl, command->cid);
1998 pci_nvme_status_tc(&compl->status, NVME_SCT_COMMAND_SPECIFIC,
2003 if (pci_nvme_aer_add(sc, command->cid)) {
2004 pci_nvme_status_tc(&compl->status, NVME_SCT_GENERIC,
2010 * Raise events when they happen based on the Set Features cmd.
2014 compl->status = NVME_NO_STATUS;
2031 sq = &sc->submit_queues[0];
2032 cq = &sc->compl_queues[0];
2034 pthread_mutex_lock(&sq->mtx);
2036 sqhead = sq->head;
2037 DPRINTF("sqhead %u, tail %u", sqhead, sq->tail);
2039 while (sqhead != atomic_load_acq_short(&sq->tail)) {
2040 cmd = &(sq->qbase)[sqhead];
2044 switch (cmd->opc) {
2094 sc->ctrldata.oacs) == 0) {
2105 cmd->opc);
2112 cmd->opc);
2115 sqhead = (sqhead + 1) % sq->size;
2118 pci_nvme_cq_update(sc, &sc->compl_queues[0],
2120 cmd->cid,
2127 sq->head = sqhead;
2129 if (cq->head != cq->tail)
2130 pci_generate_msix(sc->nsc_pi, 0);
2132 pthread_mutex_unlock(&sq->mtx);
2139 * E.g. 1 data unit is 1 - 1,000 512 byte blocks. 3 data units are 2,001 - 3,000
2147 pthread_mutex_lock(&sc->mtx);
2150 sc->write_commands++;
2153 sc->write_dunits_remainder += (bytes / 512);
2154 while (sc->write_dunits_remainder >= 1000) {
2155 sc->write_data_units++;
2156 sc->write_dunits_remainder -= 1000;
2160 sc->read_commands++;
2163 sc->read_dunits_remainder += (bytes / 512);
2164 while (sc->read_dunits_remainder >= 1000) {
2165 sc->read_data_units++;
2166 sc->read_dunits_remainder -= 1000;
2173 pthread_mutex_unlock(&sc->mtx);
2191 if (slba >> (64 - nvstore->sectsz_bits))
2194 offset = slba << nvstore->sectsz_bits;
2195 bytes = nblocks << nvstore->sectsz_bits;
2198 if ((nvstore->size <= offset) || ((nvstore->size - offset) < bytes))
2212 return (-1);
2214 if (req->io_req.br_iovcnt == NVME_MAX_IOVEC) {
2215 return (-1);
2223 if (req->io_req.br_iovcnt == 0)
2226 range_is_contiguous = (req->prev_gpaddr + req->prev_size) == gpaddr;
2229 iovidx = req->io_req.br_iovcnt - 1;
2231 req->io_req.br_iov[iovidx].iov_base =
2232 paddr_guest2host(req->sc->nsc_pi->pi_vmctx,
2233 req->prev_gpaddr, size);
2234 if (req->io_req.br_iov[iovidx].iov_base == NULL)
2235 return (-1);
2237 req->prev_size += size;
2238 req->io_req.br_resid += size;
2240 req->io_req.br_iov[iovidx].iov_len = req->prev_size;
2242 iovidx = req->io_req.br_iovcnt;
2244 req->io_req.br_offset = offset;
2245 req->io_req.br_resid = 0;
2246 req->io_req.br_param = req;
2249 req->io_req.br_iov[iovidx].iov_base =
2250 paddr_guest2host(req->sc->nsc_pi->pi_vmctx,
2252 if (req->io_req.br_iov[iovidx].iov_base == NULL)
2253 return (-1);
2255 req->io_req.br_iov[iovidx].iov_len = size;
2257 req->prev_gpaddr = gpaddr;
2258 req->prev_size = size;
2259 req->io_req.br_resid += size;
2261 req->io_req.br_iovcnt++;
2271 struct nvme_completion_queue *cq = &sc->compl_queues[sq->cqid];
2274 __func__, sqid, sq->cqid, cid, NVME_STATUS_GET_SCT(status),
2279 if (cq->head != cq->tail) {
2280 if (cq->intr_en & NVME_CQ_INTEN) {
2281 pci_generate_msix(sc->nsc_pi, cq->intr_vec);
2284 __func__, sq->cqid);
2292 req->sc = NULL;
2293 req->nvme_sq = NULL;
2294 req->sqid = 0;
2296 pthread_mutex_lock(&sc->mtx);
2298 STAILQ_INSERT_TAIL(&sc->ioreqs_free, req, link);
2299 sc->pending_ios--;
2302 if (sc->pending_ios == 0 &&
2303 NVME_CC_GET_EN(sc->regs.cc) && !(NVME_CSTS_GET_RDY(sc->regs.csts)))
2304 sc->regs.csts |= NVME_CSTS_RDY;
2306 pthread_mutex_unlock(&sc->mtx);
2308 sem_post(&sc->iosemlock);
2316 sem_wait(&sc->iosemlock);
2317 pthread_mutex_lock(&sc->mtx);
2319 req = STAILQ_FIRST(&sc->ioreqs_free);
2321 STAILQ_REMOVE_HEAD(&sc->ioreqs_free, link);
2323 req->sc = sc;
2325 sc->pending_ios++;
2327 pthread_mutex_unlock(&sc->mtx);
2329 req->io_req.br_iovcnt = 0;
2330 req->io_req.br_offset = 0;
2331 req->io_req.br_resid = 0;
2332 req->io_req.br_param = req;
2333 req->prev_gpaddr = 0;
2334 req->prev_size = 0;
2342 struct pci_nvme_ioreq *req = br->br_param;
2343 struct nvme_submission_queue *sq = req->nvme_sq;
2353 pci_nvme_set_completion(req->sc, sq, req->sqid, req->cid, status);
2354 pci_nvme_stats_write_read_update(req->sc, req->opc,
2355 req->bytes, status);
2356 pci_nvme_release_ioreq(req->sc, req);
2376 if (nvstore->type == NVME_STOR_RAM) {
2381 req->io_req.br_callback = pci_nvme_io_done;
2383 err = blockif_flush(nvstore->ctx, &req->io_req);
2406 uint8_t *buf = nvstore->ctx;
2416 if (nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, prp1, prp2,
2438 size = MIN(PAGE_SIZE - (prp1 % PAGE_SIZE), bytes);
2440 err = -1;
2445 bytes -= size;
2452 err = -1;
2456 void *vmctx = sc->nsc_pi->pi_vmctx;
2467 PAGE_SIZE - (prp % PAGE_SIZE));
2469 err = -1;
2472 last = prp_list + (NVME_PRP2_ITEMS - 1);
2479 err = -1;
2484 bytes -= size;
2489 req->io_req.br_callback = pci_nvme_io_done;
2491 err = blockif_write(nvstore->ctx, &req->io_req);
2493 err = blockif_read(nvstore->ctx, &req->io_req);
2510 bool is_write = cmd->opc == NVME_OPC_WRITE;
2513 lba = ((uint64_t)cmd->cdw11 << 32) | cmd->cdw10;
2514 nblocks = (cmd->cdw12 & 0xFFFF) + 1;
2515 bytes = nblocks << nvstore->sectsz_bits;
2529 offset = lba << nvstore->sectsz_bits;
2531 req->bytes = bytes;
2532 req->io_req.br_offset = lba;
2535 cmd->prp1 &= ~0x3UL;
2536 cmd->prp2 &= ~0x3UL;
2538 if (nvstore->type == NVME_STOR_RAM) {
2539 *status = nvme_write_read_ram(sc, nvstore, cmd->prp1,
2540 cmd->prp2, offset, bytes, is_write);
2543 cmd->prp1, cmd->prp2, offset, bytes, is_write);
2550 pci_nvme_stats_write_read_update(sc, cmd->opc, bytes, *status);
2558 struct pci_nvme_ioreq *req = br->br_param;
2559 struct pci_nvme_softc *sc = req->sc;
2566 } else if ((req->prev_gpaddr + 1) == (req->prev_size)) {
2569 struct iovec *iov = req->io_req.br_iov;
2571 req->prev_gpaddr++;
2572 iov += req->prev_gpaddr;
2575 req->io_req.br_offset = (off_t)iov->iov_base;
2576 req->io_req.br_resid = iov->iov_len;
2577 if (blockif_delete(sc->nvstore.ctx, &req->io_req)) {
2585 pci_nvme_set_completion(sc, req->nvme_sq, req->sqid, req->cid,
2603 if ((sc->ctrldata.oncs & NVME_ONCS_DSM) == 0) {
2608 nr = cmd->cdw10 & 0xff;
2616 nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, cmd->prp1, cmd->prp2,
2619 /* Check for invalid ranges and the number of non-zero lengths */
2631 if (cmd->cdw11 & NVME_DSM_ATTR_DEALLOCATE) {
2633 int sectsz_bits = sc->nvstore.sectsz_bits;
2639 if (!nvstore->deallocate) {
2665 req->io_req.br_iovcnt = 0;
2666 req->io_req.br_offset = offset;
2667 req->io_req.br_resid = bytes;
2670 req->io_req.br_callback = pci_nvme_io_done;
2672 struct iovec *iov = req->io_req.br_iov;
2680 if ((nvstore->size - offset) < bytes) {
2689 req->io_req.br_callback = pci_nvme_dealloc_sm;
2695 req->prev_gpaddr = 0;
2696 req->prev_size = dr;
2699 err = blockif_delete(nvstore->ctx, &req->io_req);
2717 /* handle all submissions up to sq->tail index */
2718 sq = &sc->submit_queues[idx];
2720 pthread_mutex_lock(&sq->mtx);
2722 sqhead = sq->head;
2724 idx, sqhead, sq->tail, sq->qbase);
2726 while (sqhead != atomic_load_acq_short(&sq->tail)) {
2736 cmd = &sq->qbase[sqhead];
2737 sqhead = (sqhead + 1) % sq->size;
2739 nsid = le32toh(cmd->nsid);
2740 if ((nsid == 0) || (nsid > sc->ctrldata.nn)) {
2754 req->nvme_sq = sq;
2755 req->sqid = idx;
2756 req->opc = cmd->opc;
2757 req->cid = cmd->cid;
2758 req->nsid = cmd->nsid;
2760 switch (cmd->opc) {
2762 pending = nvme_opc_flush(sc, cmd, &sc->nvstore,
2767 pending = nvme_opc_write_read(sc, cmd, &sc->nvstore,
2773 __func__, lba, cmd->cdw12 & 0xFFFF); */
2777 pending = nvme_opc_dataset_mgmt(sc, cmd, &sc->nvstore,
2782 __func__, cmd->opc);
2787 pci_nvme_set_completion(sc, sq, idx, cmd->cid, status);
2793 sq->head = sqhead;
2795 pthread_mutex_unlock(&sq->mtx);
2801 * "Asynchronous Event Information - Error Status" for details
2811 * Therefore, can never have more than (size - 1) entries
2813 if (sq->head == sq->tail)
2814 capacity = sq->size - 1;
2815 else if (sq->head > sq->tail)
2816 capacity = sq->size - (sq->head - sq->tail) - 1;
2818 capacity = sq->tail - sq->head - 1;
2820 if ((value == sq->tail) || /* same as previous */
2823 __func__, sq->size, sq->head, sq->tail, capacity, value);
2838 if (idx > sc->num_squeues) {
2841 __func__, idx, sc->num_squeues);
2847 if (sc->submit_queues[idx].qbase == NULL) {
2855 if (!pci_nvme_sq_doorbell_valid(&sc->submit_queues[idx], value)) {
2863 atomic_store_short(&sc->submit_queues[idx].tail,
2873 if (idx > sc->num_cqueues) {
2876 __func__, idx, sc->num_cqueues);
2882 if (sc->compl_queues[idx].qbase == NULL) {
2890 atomic_store_short(&sc->compl_queues[idx].head,
2941 DPRINTF("unknown nvme bar-0 offset 0x%lx", offset);
2953 uint64_t belloffset = offset - NVME_DOORBELL_OFFSET;
2957 if ((sc->regs.csts & NVME_CSTS_RDY) == 0) {
2963 if (belloffset > ((sc->max_queues+1) * 8 - 4)) {
2971 if (sc->submit_queues[idx].qbase == NULL)
2973 } else if (sc->compl_queues[idx].qbase == NULL)
2980 DPRINTF("nvme-write offset 0x%lx, size %d, value 0x%lx",
2993 pthread_mutex_lock(&sc->mtx);
3004 /* MSI-X, so ignore */
3007 /* MSI-X, so ignore */
3020 /* perform shutdown - flush out data to backend */
3021 sc->regs.csts &= ~NVMEM(NVME_CSTS_REG_SHST);
3022 sc->regs.csts |= NVMEF(NVME_CSTS_REG_SHST,
3025 if (NVME_CC_GET_EN(ccreg) != NVME_CC_GET_EN(sc->regs.cc)) {
3027 /* transition 1-> causes controller reset */
3034 sc->regs.cc &= ~NVME_CC_WRITE_MASK;
3035 sc->regs.cc |= ccreg & NVME_CC_WRITE_MASK;
3038 sc->regs.cc &= ~NVME_CC_NEN_WRITE_MASK;
3039 sc->regs.cc |= ccreg & NVME_CC_NEN_WRITE_MASK;
3040 sc->regs.csts &= ~NVME_CSTS_RDY;
3041 } else if ((sc->pending_ios == 0) &&
3042 !(sc->regs.csts & NVME_CSTS_CFS)) {
3043 sc->regs.csts |= NVME_CSTS_RDY;
3049 /* ignore writes; don't support subsystem reset */
3052 sc->regs.aqa = (uint32_t)value;
3055 sc->regs.asq = (sc->regs.asq & (0xFFFFFFFF00000000)) |
3059 sc->regs.asq = (sc->regs.asq & (0x00000000FFFFFFFF)) |
3063 sc->regs.acq = (sc->regs.acq & (0xFFFFFFFF00000000)) |
3067 sc->regs.acq = (sc->regs.acq & (0x00000000FFFFFFFF)) |
3074 pthread_mutex_unlock(&sc->mtx);
3081 struct pci_nvme_softc* sc = pi->pi_arg;
3085 DPRINTF("nvme-write baridx %d, msix: off 0x%lx, size %d, "
3111 void *p = &(sc->regs);
3112 pthread_mutex_lock(&sc->mtx);
3114 pthread_mutex_unlock(&sc->mtx);
3132 DPRINTF(" nvme-read offset 0x%lx, size %d -> value 0x%x",
3143 struct pci_nvme_softc* sc = pi->pi_arg;
3147 DPRINTF("nvme-read bar: %d, msix: regoff 0x%lx, size %d",
3171 sc->max_queues = NVME_QUEUES;
3172 sc->max_qentries = NVME_MAX_QENTRIES;
3173 sc->ioslots = NVME_IOSLOTS;
3174 sc->num_squeues = sc->max_queues;
3175 sc->num_cqueues = sc->max_queues;
3176 sc->dataset_management = NVME_DATASET_MANAGEMENT_AUTO;
3178 snprintf(sc->ctrldata.sn, sizeof(sc->ctrldata.sn),
3179 "NVME-%d-%d", sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func);
3183 sc->max_queues = atoi(value);
3186 sc->max_qentries = atoi(value);
3187 if (sc->max_qentries <= 0) {
3189 sc->max_qentries);
3190 return (-1);
3195 sc->ioslots = atoi(value);
3196 if (sc->ioslots <= 0) {
3197 EPRINTLN("Invalid ioslots option %d", sc->ioslots);
3198 return (-1);
3208 * 7-bit ASCII, unused bytes should be space characters.
3211 cpywithpad((char *)sc->ctrldata.sn,
3212 sizeof(sc->ctrldata.sn), value, ' ');
3216 sc->nvstore.eui64 = htobe64(strtoull(value, NULL, 0));
3220 sc->dataset_management = NVME_DATASET_MANAGEMENT_AUTO;
3222 sc->dataset_management = NVME_DATASET_MANAGEMENT_ENABLE;
3224 sc->dataset_management = NVME_DATASET_MANAGEMENT_DISABLE;
3229 if (pci_emul_add_boot_device(sc->nsc_pi, atoi(value))) {
3231 return (-1);
3239 sc->nvstore.type = NVME_STOR_RAM;
3240 sc->nvstore.size = sz * 1024 * 1024;
3241 sc->nvstore.ctx = calloc(1, sc->nvstore.size);
3242 sc->nvstore.sectsz = 4096;
3243 sc->nvstore.sectsz_bits = 12;
3244 if (sc->nvstore.ctx == NULL) {
3246 return (-1);
3250 sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func);
3251 sc->nvstore.ctx = blockif_open(nvl, bident);
3252 if (sc->nvstore.ctx == NULL) {
3255 return (-1);
3257 sc->nvstore.type = NVME_STOR_BLOCKIF;
3258 sc->nvstore.size = blockif_size(sc->nvstore.ctx);
3262 sc->nvstore.sectsz = sectsz;
3263 else if (sc->nvstore.type != NVME_STOR_RAM)
3264 sc->nvstore.sectsz = blockif_sectsz(sc->nvstore.ctx);
3265 for (sc->nvstore.sectsz_bits = 9;
3266 (1U << sc->nvstore.sectsz_bits) < sc->nvstore.sectsz;
3267 sc->nvstore.sectsz_bits++);
3269 if (sc->max_queues <= 0 || sc->max_queues > NVME_QUEUES)
3270 sc->max_queues = NVME_QUEUES;
3284 nvstore = &sc->nvstore;
3285 nd = &sc->nsdata;
3287 nvstore->size = new_size;
3291 sc->ns_log.ns[0] = 1;
3292 sc->ns_log.ns[1] = 0;
3308 pi->pi_arg = sc;
3309 sc->nsc_pi = pi;
3317 STAILQ_INIT(&sc->ioreqs_free);
3318 sc->ioreqs = calloc(sc->ioslots, sizeof(struct pci_nvme_ioreq));
3319 for (uint32_t i = 0; i < sc->ioslots; i++) {
3320 STAILQ_INSERT_TAIL(&sc->ioreqs_free, &sc->ioreqs[i], link);
3338 2 * sizeof(uint32_t) * (sc->max_queues + 1);
3349 error = pci_emul_add_msixcap(pi, sc->max_queues + 1, NVME_MSIX_BAR);
3361 pthread_mutex_init(&sc->mtx, NULL);
3362 sem_init(&sc->iosemlock, 0, sc->ioslots);
3363 blockif_register_resize_callback(sc->nvstore.ctx, pci_nvme_resized, sc);
3365 pci_nvme_init_queues(sc, sc->max_queues, sc->max_queues);
3367 * Controller data depends on Namespace data so initialize Namespace
3370 pci_nvme_init_nsdata(sc, &sc->nsdata, 1, &sc->nvstore);
3397 ram = strndup(opts + 4, cp - opts - 4);