xref: /spdk/lib/vmd/vmd.c (revision b30d57cdad6d2bc75cc1e4e2ebbcebcb0d98dcfa)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "vmd.h"
35 
36 #include "spdk/stdinc.h"
37 #include "spdk/likely.h"
38 
39 static unsigned char *device_type[] = {
40 	"PCI Express Endpoint",
41 	"Legacy PCI Express Endpoint",
42 	"Reserved 1",
43 	"Reserved 2",
44 	"Root Port of PCI Express Root Complex",
45 	"Upstream Port of PCI Express Switch",
46 	"Downstream Port of PCI Express Switch",
47 	"PCI Express to PCI/PCI-X Bridge",
48 	"PCI/PCI-X to PCI Express Bridge",
49 	"Root Complex Integrated Endpoint",
50 	"Root Complex Event Collector",
51 	"Reserved Capability"
52 };
53 
54 /*
55  * Container for all VMD adapter probed in the system.
56  */
57 struct vmd_container {
58 	uint32_t count;
59 	struct vmd_adapter vmd[MAX_VMD_SUPPORTED];
60 };
61 
62 static struct vmd_container g_vmd_container;
63 static uint8_t g_end_device_count;
64 
65 static bool
66 vmd_is_valid_cfg_addr(struct vmd_pci_bus *bus, uint64_t addr)
67 {
68 	return addr >= (uint64_t)bus->vmd->cfg_vaddr &&
69 	       addr < bus->vmd->cfgbar_size + (uint64_t)bus->vmd->cfg_vaddr;
70 }
71 
72 static void
73 vmd_align_base_addrs(struct vmd_adapter *vmd, uint32_t alignment)
74 {
75 	uint32_t pad;
76 
77 	/*
78 	 *  Device is not in hot plug path, align the base address remaining from membar 1.
79 	 */
80 	if (vmd->physical_addr & (alignment - 1)) {
81 		pad = alignment - (vmd->physical_addr & (alignment - 1));
82 		vmd->physical_addr += pad;
83 		vmd->current_addr_size -= pad;
84 	}
85 }
86 
87 static bool
88 vmd_device_is_enumerated(const struct vmd_pci_device *vmd_device)
89 {
90 	return vmd_device->header->one.prefetch_base_upper == VMD_UPPER_BASE_SIGNATURE &&
91 	       vmd_device->header->one.prefetch_limit_upper == VMD_UPPER_LIMIT_SIGNATURE;
92 }
93 
94 static bool
95 vmd_device_is_root_port(const struct vmd_pci_device *vmd_device)
96 {
97 	return vmd_device->header->common.vendor_id == 0x8086 &&
98 	       (vmd_device->header->common.device_id == 0x2030 ||
99 		vmd_device->header->common.device_id == 0x2031 ||
100 		vmd_device->header->common.device_id == 0x2032 ||
101 		vmd_device->header->common.device_id == 0x2033);
102 }
103 
104 static void
105 vmd_hotplug_coalesce_regions(struct vmd_hot_plug *hp)
106 {
107 	struct pci_mem_mgr *region, *prev;
108 
109 	do {
110 		prev = NULL;
111 		TAILQ_FOREACH(region, &hp->free_mem_queue, tailq) {
112 			if (prev != NULL && (prev->addr + prev->size == region->addr)) {
113 				break;
114 			}
115 
116 			prev = region;
117 		}
118 
119 		if (region != NULL) {
120 			prev->size += region->size;
121 			TAILQ_REMOVE(&hp->free_mem_queue, region, tailq);
122 			TAILQ_INSERT_TAIL(&hp->unused_mem_queue, region, tailq);
123 		}
124 	} while (region != NULL);
125 }
126 
127 static void
128 vmd_hotplug_free_region(struct vmd_hot_plug *hp, struct pci_mem_mgr *region)
129 {
130 	struct pci_mem_mgr *current, *prev = NULL;
131 
132 	assert(region->addr >= hp->bar.start && region->addr < hp->bar.start + hp->bar.size);
133 
134 	TAILQ_FOREACH(current, &hp->free_mem_queue, tailq) {
135 		if (current->addr > region->addr) {
136 			break;
137 		}
138 
139 		prev = current;
140 	}
141 
142 	if (prev != NULL) {
143 		assert(prev->addr + prev->size <= region->addr);
144 		assert(current == NULL || (region->addr + region->size <= current->addr));
145 		TAILQ_INSERT_AFTER(&hp->free_mem_queue, prev, region, tailq);
146 	} else {
147 		TAILQ_INSERT_HEAD(&hp->free_mem_queue, region, tailq);
148 	}
149 
150 	vmd_hotplug_coalesce_regions(hp);
151 }
152 
153 static void
154 vmd_hotplug_free_addr(struct vmd_hot_plug *hp, uint64_t addr)
155 {
156 	struct pci_mem_mgr *region;
157 
158 	TAILQ_FOREACH(region, &hp->alloc_mem_queue, tailq) {
159 		if (region->addr == addr) {
160 			break;
161 		}
162 	}
163 
164 	assert(region != NULL);
165 	TAILQ_REMOVE(&hp->alloc_mem_queue, region, tailq);
166 
167 	vmd_hotplug_free_region(hp, region);
168 }
169 
170 static uint64_t
171 vmd_hotplug_allocate_base_addr(struct vmd_hot_plug *hp, uint32_t size)
172 {
173 	struct pci_mem_mgr *region = NULL, *free_region;
174 
175 	TAILQ_FOREACH(region, &hp->free_mem_queue, tailq) {
176 		if (region->size >= size) {
177 			break;
178 		}
179 	}
180 
181 	if (region == NULL) {
182 		SPDK_DEBUGLOG(vmd, "Unable to find free hotplug memory region of size:"
183 			      "%"PRIx32"\n", size);
184 		return 0;
185 	}
186 
187 	TAILQ_REMOVE(&hp->free_mem_queue, region, tailq);
188 	if (size < region->size) {
189 		free_region = TAILQ_FIRST(&hp->unused_mem_queue);
190 		if (free_region == NULL) {
191 			SPDK_DEBUGLOG(vmd, "Unable to find unused descriptor to store the "
192 				      "free region of size: %"PRIu32"\n", region->size - size);
193 		} else {
194 			TAILQ_REMOVE(&hp->unused_mem_queue, free_region, tailq);
195 			free_region->size = region->size - size;
196 			free_region->addr = region->addr + size;
197 			region->size = size;
198 			vmd_hotplug_free_region(hp, free_region);
199 		}
200 	}
201 
202 	TAILQ_INSERT_TAIL(&hp->alloc_mem_queue, region, tailq);
203 
204 	return region->addr;
205 }
206 
207 /*
208  *  Allocates an address from vmd membar for the input memory size
209  *  vmdAdapter - vmd adapter object
210  *  dev - vmd_pci_device to allocate a base address for.
211  *  size - size of the memory window requested.
212  *  Size must be an integral multiple of 2. Addresses are returned on the size boundary.
213  *  Returns physical address within the VMD membar window, or 0x0 if cannot allocate window.
214  *  Consider increasing the size of vmd membar if 0x0 is returned.
215  */
216 static uint64_t
217 vmd_allocate_base_addr(struct vmd_adapter *vmd, struct vmd_pci_device *dev, uint32_t size)
218 {
219 	uint64_t base_address = 0, padding = 0;
220 	struct vmd_pci_bus *hp_bus;
221 
222 	if (size && ((size & (~size + 1)) != size)) {
223 		return base_address;
224 	}
225 
226 	/*
227 	 *  If device is downstream of a hot plug port, allocate address from the
228 	 *  range dedicated for the hot plug slot. Search the list of addresses allocated to determine
229 	 *  if a free range exists that satisfy the input request.  If a free range cannot be found,
230 	 *  get a buffer from the  unused chunk. First fit algorithm, is used.
231 	 */
232 	if (dev) {
233 		hp_bus = dev->parent;
234 		if (hp_bus && hp_bus->self && hp_bus->self->hotplug_capable) {
235 			return vmd_hotplug_allocate_base_addr(&hp_bus->self->hp, size);
236 		}
237 	}
238 
239 	/* Ensure physical membar allocated is size aligned */
240 	if (vmd->physical_addr & (size - 1)) {
241 		padding = size - (vmd->physical_addr & (size - 1));
242 	}
243 
244 	/* Allocate from membar if enough memory is left */
245 	if (vmd->current_addr_size >= size + padding) {
246 		base_address = vmd->physical_addr + padding;
247 		vmd->physical_addr += size + padding;
248 		vmd->current_addr_size -= size + padding;
249 	}
250 
251 	SPDK_DEBUGLOG(vmd, "allocated(size) %" PRIx64 " (%x)\n", base_address, size);
252 
253 	return base_address;
254 }
255 
256 static bool
257 vmd_is_end_device(struct vmd_pci_device *dev)
258 {
259 	return (dev && dev->header) &&
260 	       ((dev->header->common.header_type & ~PCI_MULTI_FUNCTION) == PCI_HEADER_TYPE_NORMAL);
261 }
262 
263 static void
264 vmd_update_base_limit_register(struct vmd_pci_device *dev, uint16_t base, uint16_t limit)
265 {
266 	struct vmd_pci_bus *bus;
267 	struct vmd_pci_device *bridge;
268 
269 	if (base == 0 ||  limit == 0) {
270 		return;
271 	}
272 
273 	if (dev->header->common.header_type == PCI_HEADER_TYPE_BRIDGE) {
274 		bus = dev->bus_object;
275 	} else {
276 		bus = dev->parent;
277 	}
278 
279 	bridge = bus->self;
280 	SPDK_DEBUGLOG(vmd, "base:limit = %x:%x\n", bridge->header->one.mem_base,
281 		      bridge->header->one.mem_limit);
282 
283 	if (dev->bus->vmd->scan_completed) {
284 		return;
285 	}
286 
287 	while (bus && bus->self != NULL) {
288 		bridge = bus->self;
289 
290 		/* This is only for 32-bit memory space, need to revisit to support 64-bit */
291 		if (bridge->header->one.mem_base > base) {
292 			bridge->header->one.mem_base = base;
293 			base = bridge->header->one.mem_base;
294 		}
295 
296 		if (bridge->header->one.mem_limit < limit) {
297 			bridge->header->one.mem_limit = limit;
298 			limit = bridge->header->one.mem_limit;
299 		}
300 
301 		bus = bus->parent;
302 	}
303 }
304 
305 static uint64_t
306 vmd_get_base_addr(struct vmd_pci_device *dev, uint32_t index, uint32_t size)
307 {
308 	struct vmd_pci_bus *bus = dev->parent;
309 
310 	if (dev->header_type == PCI_HEADER_TYPE_BRIDGE) {
311 		return dev->header->zero.BAR[index] & ~0xf;
312 	} else {
313 		if (bus->self->hotplug_capable) {
314 			return vmd_hotplug_allocate_base_addr(&bus->self->hp, size);
315 		} else {
316 			return (uint64_t)bus->self->header->one.mem_base << 16;
317 		}
318 	}
319 }
320 
321 static bool
322 vmd_assign_base_addrs(struct vmd_pci_device *dev)
323 {
324 	uint16_t mem_base = 0, mem_limit = 0;
325 	unsigned char mem_attr = 0;
326 	int last;
327 	struct vmd_adapter *vmd = NULL;
328 	bool ret_val = false;
329 	uint32_t bar_value;
330 	uint32_t table_offset;
331 
332 	if (dev && dev->bus) {
333 		vmd = dev->bus->vmd;
334 	}
335 
336 	if (!vmd) {
337 		return 0;
338 	}
339 
340 	vmd_align_base_addrs(vmd, ONE_MB);
341 
342 	last = dev->header_type ? 2 : 6;
343 	for (int i = 0; i < last; i++) {
344 		bar_value = dev->header->zero.BAR[i];
345 		dev->header->zero.BAR[i] = ~(0U);
346 		dev->bar[i].size = dev->header->zero.BAR[i];
347 		dev->header->zero.BAR[i] = bar_value;
348 
349 		if (dev->bar[i].size == ~(0U) || dev->bar[i].size == 0  ||
350 		    dev->header->zero.BAR[i] & 1) {
351 			dev->bar[i].size = 0;
352 			continue;
353 		}
354 		mem_attr = dev->bar[i].size & PCI_BASE_ADDR_MASK;
355 		dev->bar[i].size = TWOS_COMPLEMENT(dev->bar[i].size & PCI_BASE_ADDR_MASK);
356 
357 		if (vmd->scan_completed) {
358 			dev->bar[i].start = vmd_get_base_addr(dev, i, dev->bar[i].size);
359 		} else {
360 			dev->bar[i].start = vmd_allocate_base_addr(vmd, dev, dev->bar[i].size);
361 		}
362 
363 		dev->header->zero.BAR[i] = (uint32_t)dev->bar[i].start;
364 
365 		if (!dev->bar[i].start) {
366 			if (mem_attr == (PCI_BAR_MEMORY_PREFETCH | PCI_BAR_MEMORY_TYPE_64)) {
367 				i++;
368 			}
369 			continue;
370 		}
371 
372 		dev->bar[i].vaddr = ((uint64_t)vmd->mem_vaddr + (dev->bar[i].start - vmd->membar));
373 		mem_limit = BRIDGE_BASEREG(dev->header->zero.BAR[i]) +
374 			    BRIDGE_BASEREG(dev->bar[i].size - 1);
375 		if (!mem_base) {
376 			mem_base = BRIDGE_BASEREG(dev->header->zero.BAR[i]);
377 		}
378 
379 		ret_val = true;
380 
381 		if (mem_attr == (PCI_BAR_MEMORY_PREFETCH | PCI_BAR_MEMORY_TYPE_64)) {
382 			i++;
383 			if (i < last) {
384 				dev->header->zero.BAR[i] = (uint32_t)(dev->bar[i].start >> PCI_DWORD_SHIFT);
385 			}
386 		}
387 	}
388 
389 	/* Enable device MEM and bus mastering */
390 	dev->header->zero.command |= (PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
391 	uint16_t cmd = dev->header->zero.command;
392 	cmd++;
393 
394 	if (dev->msix_cap && ret_val) {
395 		table_offset = ((volatile struct pci_msix_cap *)dev->msix_cap)->msix_table_offset;
396 		if (dev->bar[table_offset & 0x3].vaddr) {
397 			dev->msix_table = (volatile struct pci_msix_table_entry *)
398 					  (dev->bar[table_offset & 0x3].vaddr + (table_offset & 0xfff8));
399 		}
400 	}
401 
402 	if (ret_val && vmd_is_end_device(dev)) {
403 		vmd_update_base_limit_register(dev, mem_base, mem_limit);
404 	}
405 
406 	return ret_val;
407 }
408 
409 static void
410 vmd_get_device_capabilities(struct vmd_pci_device *dev)
411 
412 {
413 	volatile uint8_t *config_space;
414 	uint8_t capabilities_offset;
415 	struct pci_capabilities_header *capabilities_hdr;
416 
417 	config_space = (volatile uint8_t *)dev->header;
418 	if ((dev->header->common.status  & PCI_CAPABILITIES_LIST) == 0) {
419 		return;
420 	}
421 
422 	capabilities_offset = dev->header->zero.cap_pointer;
423 	if (dev->header->common.header_type & PCI_HEADER_TYPE_BRIDGE) {
424 		capabilities_offset = dev->header->one.cap_pointer;
425 	}
426 
427 	while (capabilities_offset > 0) {
428 		capabilities_hdr = (struct pci_capabilities_header *)
429 				   &config_space[capabilities_offset];
430 		switch (capabilities_hdr->capability_id) {
431 		case CAPABILITY_ID_PCI_EXPRESS:
432 			dev->pcie_cap = (volatile struct pci_express_cap *)(capabilities_hdr);
433 			break;
434 
435 		case CAPABILITY_ID_MSI:
436 			dev->msi_cap = (volatile struct pci_msi_cap *)capabilities_hdr;
437 			break;
438 
439 		case CAPABILITY_ID_MSIX:
440 			dev->msix_cap = (volatile struct pci_msix_capability *)capabilities_hdr;
441 			dev->msix_table_size = dev->msix_cap->message_control.bit.table_size + 1;
442 			break;
443 
444 		default:
445 			break;
446 		}
447 		capabilities_offset = capabilities_hdr->next;
448 	}
449 }
450 
451 static volatile struct pci_enhanced_capability_header *
452 vmd_get_enhanced_capabilities(struct vmd_pci_device *dev, uint16_t capability_id)
453 {
454 	uint8_t *data;
455 	uint16_t cap_offset = EXTENDED_CAPABILITY_OFFSET;
456 	volatile struct pci_enhanced_capability_header *cap_hdr = NULL;
457 
458 	data = (uint8_t *)dev->header;
459 	while (cap_offset >= EXTENDED_CAPABILITY_OFFSET) {
460 		cap_hdr = (volatile struct pci_enhanced_capability_header *) &data[cap_offset];
461 		if (cap_hdr->capability_id == capability_id) {
462 			return cap_hdr;
463 		}
464 		cap_offset = cap_hdr->next;
465 		if (cap_offset == 0 || cap_offset < EXTENDED_CAPABILITY_OFFSET) {
466 			break;
467 		}
468 	}
469 
470 	return NULL;
471 }
472 
473 static void
474 vmd_read_config_space(struct vmd_pci_device *dev)
475 {
476 	/*
477 	 * Writes to the pci config space is posted weite. To ensure transaction reaches its destination
478 	 * before another write is posed, an immediate read of the written value should be performed.
479 	 */
480 	dev->header->common.command |= (BUS_MASTER_ENABLE | MEMORY_SPACE_ENABLE);
481 	{ uint16_t cmd = dev->header->common.command; (void)cmd; }
482 
483 	vmd_get_device_capabilities(dev);
484 	dev->sn_cap = (struct serial_number_capability *)vmd_get_enhanced_capabilities(dev,
485 			DEVICE_SERIAL_NUMBER_CAP_ID);
486 }
487 
488 static void
489 vmd_update_scan_info(struct vmd_pci_device *dev)
490 {
491 	struct vmd_adapter *vmd_adapter = dev->bus->vmd;
492 
493 	if (vmd_adapter->root_port_updated) {
494 		return;
495 	}
496 
497 	if (dev->header_type == PCI_HEADER_TYPE_NORMAL) {
498 		return;
499 	}
500 
501 	if (vmd_device_is_root_port(dev)) {
502 		vmd_adapter->root_port_updated = 1;
503 		SPDK_DEBUGLOG(vmd, "root_port_updated = %d\n",
504 			      vmd_adapter->root_port_updated);
505 		SPDK_DEBUGLOG(vmd, "upper:limit = %x : %x\n",
506 			      dev->header->one.prefetch_base_upper,
507 			      dev->header->one.prefetch_limit_upper);
508 		if (vmd_device_is_enumerated(dev)) {
509 			vmd_adapter->scan_completed = 1;
510 			SPDK_DEBUGLOG(vmd, "scan_completed = %d\n",
511 				      vmd_adapter->scan_completed);
512 		}
513 	}
514 }
515 
516 static void
517 vmd_reset_base_limit_registers(struct vmd_pci_device *dev)
518 {
519 	uint32_t reg __attribute__((unused));
520 
521 	assert(dev->header_type != PCI_HEADER_TYPE_NORMAL);
522 	/*
523 	 * Writes to the pci config space are posted writes.
524 	 * To ensure transaction reaches its destination
525 	 * before another write is posted, an immediate read
526 	 * of the written value should be performed.
527 	 */
528 	dev->header->one.mem_base = 0xfff0;
529 	reg = dev->header->one.mem_base;
530 	dev->header->one.mem_limit = 0x0;
531 	reg = dev->header->one.mem_limit;
532 	dev->header->one.prefetch_base = 0x0;
533 	reg = dev->header->one.prefetch_base;
534 	dev->header->one.prefetch_limit = 0x0;
535 	reg = dev->header->one.prefetch_limit;
536 	dev->header->one.prefetch_base_upper = 0x0;
537 	reg = dev->header->one.prefetch_base_upper;
538 	dev->header->one.prefetch_limit_upper = 0x0;
539 	reg = dev->header->one.prefetch_limit_upper;
540 	dev->header->one.io_base_upper = 0x0;
541 	reg = dev->header->one.io_base_upper;
542 	dev->header->one.io_limit_upper = 0x0;
543 	reg = dev->header->one.io_limit_upper;
544 	dev->header->one.primary = 0;
545 	reg = dev->header->one.primary;
546 	dev->header->one.secondary = 0;
547 	reg = dev->header->one.secondary;
548 	dev->header->one.subordinate = 0;
549 	reg = dev->header->one.subordinate;
550 }
551 
552 static void
553 vmd_init_hotplug(struct vmd_pci_device *dev, struct vmd_pci_bus *bus)
554 {
555 	struct vmd_adapter *vmd = bus->vmd;
556 	struct vmd_hot_plug *hp = &dev->hp;
557 	size_t mem_id;
558 
559 	dev->hotplug_capable = true;
560 	hp->bar.size = 1 << 20;
561 
562 	if (!vmd->scan_completed) {
563 		hp->bar.start = vmd_allocate_base_addr(vmd, NULL, hp->bar.size);
564 		bus->self->header->one.mem_base = BRIDGE_BASEREG(hp->bar.start);
565 		bus->self->header->one.mem_limit =
566 			bus->self->header->one.mem_base + BRIDGE_BASEREG(hp->bar.size - 1);
567 	} else {
568 		hp->bar.start = (uint64_t)bus->self->header->one.mem_base << 16;
569 	}
570 
571 	hp->bar.vaddr = (uint64_t)vmd->mem_vaddr + (hp->bar.start - vmd->membar);
572 
573 	TAILQ_INIT(&hp->free_mem_queue);
574 	TAILQ_INIT(&hp->unused_mem_queue);
575 	TAILQ_INIT(&hp->alloc_mem_queue);
576 
577 	hp->mem[0].size = hp->bar.size;
578 	hp->mem[0].addr = hp->bar.start;
579 
580 	TAILQ_INSERT_TAIL(&hp->free_mem_queue, &hp->mem[0], tailq);
581 
582 	for (mem_id = 1; mem_id < ADDR_ELEM_COUNT; ++mem_id) {
583 		TAILQ_INSERT_TAIL(&hp->unused_mem_queue, &hp->mem[mem_id], tailq);
584 	}
585 
586 	SPDK_DEBUGLOG(vmd, "%s: mem_base:mem_limit = %x : %x\n", __func__,
587 		      bus->self->header->one.mem_base, bus->self->header->one.mem_limit);
588 }
589 
590 static bool
591 vmd_bus_device_present(struct vmd_pci_bus *bus, uint32_t devfn)
592 {
593 	volatile struct pci_header *header;
594 
595 	header = (volatile struct pci_header *)(bus->vmd->cfg_vaddr +
596 						CONFIG_OFFSET_ADDR(bus->bus_number, devfn, 0, 0));
597 	if (!vmd_is_valid_cfg_addr(bus, (uint64_t)header)) {
598 		return false;
599 	}
600 
601 	if (header->common.vendor_id == PCI_INVALID_VENDORID || header->common.vendor_id == 0x0) {
602 		return false;
603 	}
604 
605 	return true;
606 }
607 
608 static struct vmd_pci_device *
609 vmd_alloc_dev(struct vmd_pci_bus *bus, uint32_t devfn)
610 {
611 	struct vmd_pci_device *dev = NULL;
612 	struct pci_header volatile *header;
613 	uint8_t header_type;
614 	uint32_t rev_class;
615 
616 	/* Make sure we're not creating two devices on the same dev/fn */
617 	TAILQ_FOREACH(dev, &bus->dev_list, tailq) {
618 		if (dev->devfn == devfn) {
619 			return NULL;
620 		}
621 	}
622 
623 	if (!vmd_bus_device_present(bus, devfn)) {
624 		return NULL;
625 	}
626 
627 	header = (struct pci_header * volatile)(bus->vmd->cfg_vaddr +
628 						CONFIG_OFFSET_ADDR(bus->bus_number, devfn, 0, 0));
629 
630 	SPDK_DEBUGLOG(vmd, "PCI device found: %04x:%04x ***\n",
631 		      header->common.vendor_id, header->common.device_id);
632 
633 	dev = calloc(1, sizeof(*dev));
634 	if (!dev) {
635 		return NULL;
636 	}
637 
638 	dev->header = header;
639 	dev->vid = dev->header->common.vendor_id;
640 	dev->did = dev->header->common.device_id;
641 	dev->bus = bus;
642 	dev->parent = bus;
643 	dev->devfn = devfn;
644 	header_type = dev->header->common.header_type;
645 	rev_class = dev->header->common.rev_class;
646 	dev->class = rev_class >> 8;
647 	dev->header_type = header_type & 0x7;
648 
649 	if (header_type == PCI_HEADER_TYPE_BRIDGE) {
650 		vmd_update_scan_info(dev);
651 		if (!dev->bus->vmd->scan_completed) {
652 			vmd_reset_base_limit_registers(dev);
653 		}
654 	}
655 
656 	vmd_read_config_space(dev);
657 
658 	return dev;
659 }
660 
661 static struct vmd_pci_bus *
662 vmd_create_new_bus(struct vmd_pci_bus *parent, struct vmd_pci_device *bridge, uint8_t bus_number)
663 {
664 	struct vmd_pci_bus *new_bus;
665 
666 	new_bus = calloc(1, sizeof(*new_bus));
667 	if (!new_bus) {
668 		return NULL;
669 	}
670 
671 	new_bus->parent = parent;
672 	new_bus->domain = parent->domain;
673 	new_bus->bus_number = bus_number;
674 	new_bus->secondary_bus = new_bus->subordinate_bus = bus_number;
675 	new_bus->self = bridge;
676 	new_bus->vmd = parent->vmd;
677 	TAILQ_INIT(&new_bus->dev_list);
678 
679 	bridge->subordinate = new_bus;
680 
681 	bridge->pci.addr.bus = new_bus->bus_number;
682 	bridge->pci.addr.dev = bridge->devfn;
683 	bridge->pci.addr.func = 0;
684 	bridge->pci.addr.domain = parent->vmd->pci->addr.domain;
685 
686 	return new_bus;
687 }
688 
689 /*
690  * Assigns a bus number from the list of available
691  * bus numbers. If the device is downstream of a hot plug port,
692  * assign the bus number from thiose assigned to the HP port. Otherwise,
693  * assign the next bus number from the vmd bus number list.
694  */
695 static uint8_t
696 vmd_get_next_bus_number(struct vmd_pci_device *dev, struct vmd_adapter *vmd)
697 {
698 	uint8_t bus = 0xff;
699 	struct vmd_pci_bus *hp_bus;
700 
701 	if (dev) {
702 		hp_bus = vmd_is_dev_in_hotplug_path(dev);
703 		if (hp_bus && hp_bus->self && hp_bus->self->hotplug_capable) {
704 			return vmd_hp_get_next_bus_number(&hp_bus->self->hp);
705 		}
706 	}
707 
708 	/* Device is not under a hot plug path. Return next global bus number */
709 	if ((vmd->next_bus_number + 1) < vmd->max_pci_bus) {
710 		bus = vmd->next_bus_number;
711 		vmd->next_bus_number++;
712 	}
713 	return bus;
714 }
715 
716 static uint8_t
717 vmd_get_hotplug_bus_numbers(struct vmd_pci_device *dev)
718 {
719 	uint8_t bus_number = 0xff;
720 
721 	if (dev && dev->bus && dev->bus->vmd &&
722 	    ((dev->bus->vmd->next_bus_number + RESERVED_HOTPLUG_BUSES) < dev->bus->vmd->max_pci_bus)) {
723 		bus_number = RESERVED_HOTPLUG_BUSES;
724 		dev->bus->vmd->next_bus_number += RESERVED_HOTPLUG_BUSES;
725 	}
726 
727 	return bus_number;
728 }
729 
730 static void
731 vmd_enable_msix(struct vmd_pci_device *dev)
732 {
733 	volatile uint16_t control;
734 
735 	control = dev->msix_cap->message_control.as_uint16_t | (1 << 14);
736 	dev->msix_cap->message_control.as_uint16_t = control;
737 	control = dev->msix_cap->message_control.as_uint16_t;
738 	dev->msix_cap->message_control.as_uint16_t = (control | (1 << 15));
739 	control = dev->msix_cap->message_control.as_uint16_t;
740 	control = control & ~(1 << 14);
741 	dev->msix_cap->message_control.as_uint16_t = control;
742 	control = dev->msix_cap->message_control.as_uint16_t;
743 }
744 
745 static void
746 vmd_disable_msix(struct vmd_pci_device *dev)
747 {
748 	volatile uint16_t control;
749 
750 	control = dev->msix_cap->message_control.as_uint16_t | (1 << 14);
751 	dev->msix_cap->message_control.as_uint16_t = control;
752 	control = dev->msix_cap->message_control.as_uint16_t & ~(1 << 15);
753 	dev->msix_cap->message_control.as_uint16_t = control;
754 	control = dev->msix_cap->message_control.as_uint16_t;
755 }
756 
757 /*
758  * Set up MSI-X table entries for the port. Vmd MSIX vector 0 is used for
759  * port interrupt, so vector 0 is mapped to all MSIX entries for the port.
760  */
761 static void
762 vmd_setup_msix(struct vmd_pci_device *dev, volatile struct pci_msix_table_entry *vmdEntry)
763 {
764 	int entry;
765 
766 	if (!dev || !vmdEntry || !dev->msix_cap) {
767 		return;
768 	}
769 
770 	vmd_disable_msix(dev);
771 	if (dev->msix_table == NULL || dev->msix_table_size > MAX_MSIX_TABLE_SIZE) {
772 		return;
773 	}
774 
775 	for (entry = 0; entry < dev->msix_table_size; ++entry) {
776 		dev->msix_table[entry].vector_control = 1;
777 	}
778 	vmd_enable_msix(dev);
779 }
780 
781 static void
782 vmd_bus_update_bridge_info(struct vmd_pci_device *bridge)
783 {
784 	/* Update the subordinate bus of all bridges above this bridge */
785 	volatile struct vmd_pci_device *dev = bridge;
786 	uint8_t subordinate_bus;
787 
788 	if (!dev) {
789 		return;
790 	}
791 	subordinate_bus = bridge->header->one.subordinate;
792 	while (dev->parent_bridge != NULL) {
793 		dev = dev->parent_bridge;
794 		if (dev->header->one.subordinate < subordinate_bus) {
795 			dev->header->one.subordinate = subordinate_bus;
796 			subordinate_bus = dev->header->one.subordinate;
797 		}
798 	}
799 }
800 
801 static bool
802 vmd_is_supported_device(struct vmd_pci_device *dev)
803 {
804 	return dev->class == PCI_CLASS_STORAGE_EXPRESS;
805 }
806 
807 static int
808 vmd_dev_map_bar(struct spdk_pci_device *pci_dev, uint32_t bar,
809 		void **mapped_addr, uint64_t *phys_addr, uint64_t *size)
810 {
811 	struct vmd_pci_device *dev = SPDK_CONTAINEROF(pci_dev, struct vmd_pci_device, pci);
812 
813 	*size = dev->bar[bar].size;
814 	*phys_addr = dev->bar[bar].start;
815 	*mapped_addr = (void *)dev->bar[bar].vaddr;
816 
817 	return 0;
818 }
819 
820 static int
821 vmd_dev_unmap_bar(struct spdk_pci_device *_dev, uint32_t bar, void *addr)
822 {
823 	return 0;
824 }
825 
826 static int
827 vmd_dev_cfg_read(struct spdk_pci_device *_dev, void *value, uint32_t len,
828 		 uint32_t offset)
829 {
830 	struct vmd_pci_device *dev = SPDK_CONTAINEROF(_dev, struct vmd_pci_device, pci);
831 	volatile uint8_t *src = (volatile uint8_t *)dev->header;
832 	uint8_t *dst = value;
833 	size_t i;
834 
835 	if (len + offset > PCI_MAX_CFG_SIZE) {
836 		return -1;
837 	}
838 
839 	for (i = 0; i < len; ++i) {
840 		dst[i] = src[offset + i];
841 	}
842 
843 	return 0;
844 }
845 
846 static int
847 vmd_dev_cfg_write(struct spdk_pci_device *_dev,  void *value,
848 		  uint32_t len, uint32_t offset)
849 {
850 	struct vmd_pci_device *dev = SPDK_CONTAINEROF(_dev, struct vmd_pci_device, pci);
851 	volatile uint8_t *dst = (volatile uint8_t *)dev->header;
852 	uint8_t *src = value;
853 	size_t i;
854 
855 	if ((len + offset) > PCI_MAX_CFG_SIZE) {
856 		return -1;
857 	}
858 
859 	for (i = 0; i < len; ++i) {
860 		dst[offset + i] = src[i];
861 	}
862 
863 	return 0;
864 }
865 
866 static void
867 vmd_dev_detach(struct spdk_pci_device *dev)
868 {
869 	struct vmd_pci_device *vmd_device = (struct vmd_pci_device *)dev;
870 	struct vmd_pci_device *bus_device = vmd_device->bus->self;
871 	struct vmd_pci_bus *bus = vmd_device->bus;
872 	size_t i, num_bars = vmd_device->header_type ? 2 : 6;
873 
874 	spdk_pci_unhook_device(dev);
875 	TAILQ_REMOVE(&bus->dev_list, vmd_device, tailq);
876 
877 	/* Release the hotplug region if the device is under hotplug-capable bus */
878 	if (bus_device && bus_device->hotplug_capable) {
879 		for (i = 0; i < num_bars; ++i) {
880 			if (vmd_device->bar[i].start != 0) {
881 				vmd_hotplug_free_addr(&bus_device->hp, vmd_device->bar[i].start);
882 			}
883 		}
884 	}
885 
886 	free(dev);
887 }
888 
889 static void
890 vmd_dev_init(struct vmd_pci_device *dev)
891 {
892 	uint8_t bdf[32];
893 
894 	dev->pci.addr.domain = dev->bus->vmd->domain;
895 	dev->pci.addr.bus = dev->bus->bus_number;
896 	dev->pci.addr.dev = dev->devfn;
897 	dev->pci.addr.func = 0;
898 	dev->pci.id.vendor_id = dev->header->common.vendor_id;
899 	dev->pci.id.device_id = dev->header->common.device_id;
900 	dev->pci.type = "vmd";
901 	dev->pci.map_bar = vmd_dev_map_bar;
902 	dev->pci.unmap_bar = vmd_dev_unmap_bar;
903 	dev->pci.cfg_read = vmd_dev_cfg_read;
904 	dev->pci.cfg_write = vmd_dev_cfg_write;
905 	dev->hotplug_capable = false;
906 	if (dev->pcie_cap != NULL) {
907 		dev->cached_slot_control = dev->pcie_cap->slot_control;
908 	}
909 
910 	if (vmd_is_supported_device(dev)) {
911 		spdk_pci_addr_fmt(bdf, sizeof(bdf), &dev->pci.addr);
912 		SPDK_DEBUGLOG(vmd, "Initalizing NVMe device at %s\n", bdf);
913 		dev->pci.parent = dev->bus->vmd->pci;
914 		spdk_pci_hook_device(spdk_pci_nvme_get_driver(), &dev->pci);
915 	}
916 }
917 
918 /*
919  * Scans a single bus for all devices attached and return a count of
920  * how many devices found. In the VMD topology, it is assume there are no multi-
921  * function devices. Hence a bus(bridge) will not have multi function with both type
922  * 0 and 1 header.
923  *
924  * The other option  for implementing this function is the bus is an int and
925  * create a new device PciBridge. PciBridge would inherit from PciDevice with extra fields,
926  * sub/pri/sec bus. The input becomes PciPort, bus number and parent_bridge.
927  *
928  * The bus number is scanned and if a device is found, based on the header_type, create
929  * either PciBridge(1) or PciDevice(0).
930  *
931  * If a PciBridge, assign bus numbers and rescan new bus. The currenty PciBridge being
932  * scanned becomes the passed in parent_bridge with the new bus number.
933  *
934  * The linked list becomes list of pciBridges with PciDevices attached.
935  *
936  * Return count of how many devices found(type1 + type 0 header devices)
937  */
938 static uint8_t
939 vmd_scan_single_bus(struct vmd_pci_bus *bus, struct vmd_pci_device *parent_bridge)
940 {
941 	/* assuming only single function devices are on the bus */
942 	struct vmd_pci_device *new_dev;
943 	struct vmd_adapter *vmd;
944 	union express_slot_capabilities_register slot_cap;
945 	struct vmd_pci_bus *new_bus;
946 	uint8_t  device_number, dev_cnt = 0;
947 	uint8_t new_bus_num;
948 
949 	for (device_number = 0; device_number < 32; device_number++) {
950 		new_dev = vmd_alloc_dev(bus, device_number);
951 		if (new_dev == NULL) {
952 			continue;
953 		}
954 
955 		dev_cnt++;
956 		if (new_dev->header->common.header_type & PCI_HEADER_TYPE_BRIDGE) {
957 			slot_cap.as_uint32_t = 0;
958 			if (new_dev->pcie_cap != NULL) {
959 				slot_cap.as_uint32_t = new_dev->pcie_cap->slot_cap.as_uint32_t;
960 			}
961 
962 			new_bus_num = vmd_get_next_bus_number(bus->vmd->is_hotplug_scan ? new_dev : NULL, bus->vmd);
963 			if (new_bus_num == 0xff) {
964 				free(new_dev);
965 				return dev_cnt;
966 			}
967 			new_bus = vmd_create_new_bus(bus, new_dev, new_bus_num);
968 			if (!new_bus) {
969 				free(new_dev);
970 				return dev_cnt;
971 			}
972 			new_bus->primary_bus = bus->secondary_bus;
973 			new_bus->self = new_dev;
974 			new_dev->bus_object = new_bus;
975 
976 			if (slot_cap.bit_field.hotplug_capable && new_dev->pcie_cap != NULL &&
977 			    new_dev->pcie_cap->express_cap_register.bit_field.slot_implemented) {
978 				new_bus->hotplug_buses = vmd_get_hotplug_bus_numbers(new_dev);
979 				new_bus->subordinate_bus += new_bus->hotplug_buses;
980 
981 				/* Attach hot plug instance if HP is supported */
982 				/* Hot inserted SSDs can be assigned port bus of sub-ordinate + 1 */
983 				SPDK_DEBUGLOG(vmd, "hotplug_capable/slot_implemented = "
984 					      "%x:%x\n", slot_cap.bit_field.hotplug_capable,
985 					      new_dev->pcie_cap->express_cap_register.bit_field.slot_implemented);
986 			}
987 
988 			new_dev->parent_bridge = parent_bridge;
989 			new_dev->header->one.primary = new_bus->primary_bus;
990 			new_dev->header->one.secondary = new_bus->secondary_bus;
991 			new_dev->header->one.subordinate = new_bus->subordinate_bus;
992 
993 			vmd_bus_update_bridge_info(new_dev);
994 			TAILQ_INSERT_TAIL(&bus->vmd->bus_list, new_bus, tailq);
995 
996 			vmd_dev_init(new_dev);
997 
998 			if (slot_cap.bit_field.hotplug_capable && new_dev->pcie_cap != NULL &&
999 			    new_dev->pcie_cap->express_cap_register.bit_field.slot_implemented) {
1000 				vmd_init_hotplug(new_dev, new_bus);
1001 			}
1002 
1003 			dev_cnt += vmd_scan_single_bus(new_bus, new_dev);
1004 			if (new_dev->pcie_cap != NULL) {
1005 				if (new_dev->pcie_cap->express_cap_register.bit_field.device_type == SwitchUpstreamPort) {
1006 					return dev_cnt;
1007 				}
1008 			}
1009 		} else {
1010 			/* Attach the device to the current bus and assign base addresses */
1011 			TAILQ_INSERT_TAIL(&bus->dev_list, new_dev, tailq);
1012 			g_end_device_count++;
1013 			if (vmd_assign_base_addrs(new_dev)) {
1014 				vmd_setup_msix(new_dev, &bus->vmd->msix_table[0]);
1015 				vmd_dev_init(new_dev);
1016 				if (vmd_is_supported_device(new_dev)) {
1017 					vmd = bus->vmd;
1018 					vmd->target[vmd->nvme_count] = new_dev;
1019 					vmd->nvme_count++;
1020 				}
1021 			} else {
1022 				SPDK_DEBUGLOG(vmd, "Removing failed device:%p\n", new_dev);
1023 				TAILQ_REMOVE(&bus->dev_list, new_dev, tailq);
1024 				free(new_dev);
1025 				if (dev_cnt) {
1026 					dev_cnt--;
1027 				}
1028 			}
1029 		}
1030 	}
1031 
1032 	return dev_cnt;
1033 }
1034 
1035 static void
1036 vmd_print_pci_info(struct vmd_pci_device *dev)
1037 {
1038 	if (!dev) {
1039 		return;
1040 	}
1041 
1042 	if (dev->pcie_cap != NULL) {
1043 		SPDK_INFOLOG(vmd, "PCI DEVICE: [%04X:%04X] type(%x) : %s\n",
1044 			     dev->header->common.vendor_id, dev->header->common.device_id,
1045 			     dev->pcie_cap->express_cap_register.bit_field.device_type,
1046 			     device_type[dev->pcie_cap->express_cap_register.bit_field.device_type]);
1047 	} else {
1048 		SPDK_INFOLOG(vmd, "PCI DEVICE: [%04X:%04X]\n",
1049 			     dev->header->common.vendor_id, dev->header->common.device_id);
1050 	}
1051 
1052 	SPDK_INFOLOG(vmd, "\tDOMAIN:BDF: %04x:%02x:%02x:%x\n", dev->pci.addr.domain,
1053 		     dev->pci.addr.bus, dev->pci.addr.dev, dev->pci.addr.func);
1054 
1055 	if (!(dev->header_type & PCI_HEADER_TYPE_BRIDGE) && dev->bus) {
1056 		SPDK_INFOLOG(vmd, "\tbase addr: %x : %p\n",
1057 			     dev->header->zero.BAR[0], (void *)dev->bar[0].vaddr);
1058 	}
1059 
1060 	if ((dev->header_type & PCI_HEADER_TYPE_BRIDGE)) {
1061 		SPDK_INFOLOG(vmd, "\tPrimary = %d, Secondary = %d, Subordinate = %d\n",
1062 			     dev->header->one.primary, dev->header->one.secondary, dev->header->one.subordinate);
1063 		if (dev->pcie_cap && dev->pcie_cap->express_cap_register.bit_field.slot_implemented) {
1064 			SPDK_INFOLOG(vmd, "\tSlot implemented on this device.\n");
1065 			if (dev->pcie_cap->slot_cap.bit_field.hotplug_capable) {
1066 				SPDK_INFOLOG(vmd, "Device has HOT-PLUG capable slot.\n");
1067 			}
1068 		}
1069 	}
1070 
1071 	if (dev->sn_cap != NULL) {
1072 		uint8_t *snLow = (uint8_t *)&dev->sn_cap->sn_low;
1073 		uint8_t *snHi = (uint8_t *)&dev->sn_cap->sn_hi;
1074 
1075 		SPDK_INFOLOG(vmd, "\tSN: %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x\n",
1076 			     snHi[3], snHi[2], snHi[1], snHi[0], snLow[3], snLow[2], snLow[1], snLow[0]);
1077 	}
1078 }
1079 
1080 static void
1081 vmd_cache_scan_info(struct vmd_pci_device *dev)
1082 {
1083 	uint32_t reg __attribute__((unused));
1084 
1085 	if (dev->header_type == PCI_HEADER_TYPE_NORMAL) {
1086 		return;
1087 	}
1088 
1089 	SPDK_DEBUGLOG(vmd, "vendor/device id:%x:%x\n", dev->header->common.vendor_id,
1090 		      dev->header->common.device_id);
1091 
1092 	if (vmd_device_is_root_port(dev)) {
1093 		dev->header->one.prefetch_base_upper = VMD_UPPER_BASE_SIGNATURE;
1094 		reg = dev->header->one.prefetch_base_upper;
1095 		dev->header->one.prefetch_limit_upper = VMD_UPPER_LIMIT_SIGNATURE;
1096 		reg = dev->header->one.prefetch_limit_upper;
1097 
1098 		SPDK_DEBUGLOG(vmd, "prefetch: %x:%x\n",
1099 			      dev->header->one.prefetch_base_upper,
1100 			      dev->header->one.prefetch_limit_upper);
1101 	}
1102 }
1103 
1104 static uint8_t
1105 vmd_scan_pcibus(struct vmd_pci_bus *bus)
1106 {
1107 	struct vmd_pci_bus *bus_entry;
1108 	struct vmd_pci_device *dev;
1109 	uint8_t dev_cnt;
1110 
1111 	g_end_device_count = 0;
1112 	TAILQ_INSERT_TAIL(&bus->vmd->bus_list, bus, tailq);
1113 	bus->vmd->next_bus_number = bus->bus_number + 1;
1114 	dev_cnt = vmd_scan_single_bus(bus, NULL);
1115 
1116 	SPDK_DEBUGLOG(vmd, "VMD scan found %u devices\n", dev_cnt);
1117 	SPDK_DEBUGLOG(vmd, "VMD scan found %u END DEVICES\n", g_end_device_count);
1118 
1119 	SPDK_INFOLOG(vmd, "PCIe devices attached to VMD %04x:%02x:%02x:%x...\n",
1120 		     bus->vmd->pci->addr.domain, bus->vmd->pci->addr.bus,
1121 		     bus->vmd->pci->addr.dev, bus->vmd->pci->addr.func);
1122 
1123 	TAILQ_FOREACH(bus_entry, &bus->vmd->bus_list, tailq) {
1124 		if (bus_entry->self != NULL) {
1125 			vmd_print_pci_info(bus_entry->self);
1126 			vmd_cache_scan_info(bus_entry->self);
1127 		}
1128 
1129 		TAILQ_FOREACH(dev, &bus_entry->dev_list, tailq) {
1130 			vmd_print_pci_info(dev);
1131 		}
1132 	}
1133 
1134 	return dev_cnt;
1135 }
1136 
1137 static int
1138 vmd_map_bars(struct vmd_adapter *vmd, struct spdk_pci_device *dev)
1139 {
1140 	int rc;
1141 
1142 	rc = spdk_pci_device_map_bar(dev, 0, (void **)&vmd->cfg_vaddr,
1143 				     &vmd->cfgbar, &vmd->cfgbar_size);
1144 	if (rc == 0) {
1145 		rc = spdk_pci_device_map_bar(dev, 2, (void **)&vmd->mem_vaddr,
1146 					     &vmd->membar, &vmd->membar_size);
1147 	}
1148 
1149 	if (rc == 0) {
1150 		rc = spdk_pci_device_map_bar(dev, 4, (void **)&vmd->msix_vaddr,
1151 					     &vmd->msixbar, &vmd->msixbar_size);
1152 	}
1153 
1154 	if (rc == 0) {
1155 		vmd->physical_addr = vmd->membar;
1156 		vmd->current_addr_size = vmd->membar_size;
1157 	}
1158 	return rc;
1159 }
1160 
1161 static int
1162 vmd_enumerate_devices(struct vmd_adapter *vmd)
1163 {
1164 	vmd->vmd_bus.vmd = vmd;
1165 	vmd->vmd_bus.secondary_bus = vmd->vmd_bus.subordinate_bus = 0;
1166 	vmd->vmd_bus.primary_bus = vmd->vmd_bus.bus_number = 0;
1167 	vmd->vmd_bus.domain = vmd->pci->addr.domain;
1168 
1169 	return vmd_scan_pcibus(&vmd->vmd_bus);
1170 }
1171 
1172 struct vmd_pci_device *
1173 vmd_find_device(const struct spdk_pci_addr *addr)
1174 {
1175 	struct vmd_pci_bus *bus;
1176 	struct vmd_pci_device *dev;
1177 	int i;
1178 
1179 	for (i = 0; i < MAX_VMD_TARGET; ++i) {
1180 		TAILQ_FOREACH(bus, &g_vmd_container.vmd[i].bus_list, tailq) {
1181 			if (bus->self) {
1182 				if (spdk_pci_addr_compare(&bus->self->pci.addr, addr) == 0) {
1183 					return bus->self;
1184 				}
1185 			}
1186 
1187 			TAILQ_FOREACH(dev, &bus->dev_list, tailq) {
1188 				if (spdk_pci_addr_compare(&dev->pci.addr, addr) == 0) {
1189 					return dev;
1190 				}
1191 			}
1192 		}
1193 	}
1194 
1195 	return NULL;
1196 }
1197 
1198 static int
1199 vmd_enum_cb(void *ctx, struct spdk_pci_device *pci_dev)
1200 {
1201 	uint32_t cmd_reg = 0;
1202 	char bdf[32] = {0};
1203 	struct vmd_container *vmd_c = ctx;
1204 	size_t i;
1205 
1206 	spdk_pci_device_cfg_read32(pci_dev, &cmd_reg, 4);
1207 	cmd_reg |= 0x6;                      /* PCI bus master/memory enable. */
1208 	spdk_pci_device_cfg_write32(pci_dev, cmd_reg, 4);
1209 
1210 	spdk_pci_addr_fmt(bdf, sizeof(bdf), &pci_dev->addr);
1211 	SPDK_DEBUGLOG(vmd, "Found a VMD[ %d ] at %s\n", vmd_c->count, bdf);
1212 
1213 	/* map vmd bars */
1214 	i = vmd_c->count;
1215 	vmd_c->vmd[i].pci = pci_dev;
1216 	vmd_c->vmd[i].vmd_index = i;
1217 	vmd_c->vmd[i].domain =
1218 		(pci_dev->addr.bus << 16) | (pci_dev->addr.dev << 8) | pci_dev->addr.func;
1219 	vmd_c->vmd[i].max_pci_bus = PCI_MAX_BUS_NUMBER;
1220 	TAILQ_INIT(&vmd_c->vmd[i].bus_list);
1221 
1222 	if (vmd_map_bars(&vmd_c->vmd[i], pci_dev) == -1) {
1223 		return -1;
1224 	}
1225 
1226 	SPDK_DEBUGLOG(vmd, "vmd config bar(%p) vaddr(%p) size(%x)\n",
1227 		      (void *)vmd_c->vmd[i].cfgbar, (void *)vmd_c->vmd[i].cfg_vaddr,
1228 		      (uint32_t)vmd_c->vmd[i].cfgbar_size);
1229 	SPDK_DEBUGLOG(vmd, "vmd mem bar(%p) vaddr(%p) size(%x)\n",
1230 		      (void *)vmd_c->vmd[i].membar, (void *)vmd_c->vmd[i].mem_vaddr,
1231 		      (uint32_t)vmd_c->vmd[i].membar_size);
1232 	SPDK_DEBUGLOG(vmd, "vmd msix bar(%p) vaddr(%p) size(%x)\n\n",
1233 		      (void *)vmd_c->vmd[i].msixbar, (void *)vmd_c->vmd[i].msix_vaddr,
1234 		      (uint32_t)vmd_c->vmd[i].msixbar_size);
1235 
1236 	vmd_c->count = i + 1;
1237 
1238 	vmd_enumerate_devices(&vmd_c->vmd[i]);
1239 
1240 	return 0;
1241 }
1242 
1243 int
1244 spdk_vmd_pci_device_list(struct spdk_pci_addr vmd_addr, struct spdk_pci_device *nvme_list)
1245 {
1246 	int cnt = 0;
1247 	struct vmd_pci_bus *bus;
1248 	struct vmd_pci_device *dev;
1249 
1250 	if (!nvme_list) {
1251 		return -1;
1252 	}
1253 
1254 	for (int i = 0; i < MAX_VMD_TARGET; ++i) {
1255 		if (spdk_pci_addr_compare(&vmd_addr, &g_vmd_container.vmd[i].pci->addr) == 0) {
1256 			TAILQ_FOREACH(bus, &g_vmd_container.vmd[i].bus_list, tailq) {
1257 				TAILQ_FOREACH(dev, &bus->dev_list, tailq) {
1258 					nvme_list[cnt++] = dev->pci;
1259 					if (!dev->is_hooked) {
1260 						vmd_dev_init(dev);
1261 						dev->is_hooked = 1;
1262 					}
1263 				}
1264 			}
1265 		}
1266 	}
1267 
1268 	return cnt;
1269 }
1270 
1271 static void
1272 vmd_clear_hotplug_status(struct vmd_pci_bus *bus)
1273 {
1274 	struct vmd_pci_device *device = bus->self;
1275 	uint16_t status __attribute__((unused));
1276 
1277 	status = device->pcie_cap->slot_status.as_uint16_t;
1278 	device->pcie_cap->slot_status.as_uint16_t = status;
1279 	status = device->pcie_cap->slot_status.as_uint16_t;
1280 
1281 	status = device->pcie_cap->link_status.as_uint16_t;
1282 	device->pcie_cap->link_status.as_uint16_t = status;
1283 	status = device->pcie_cap->link_status.as_uint16_t;
1284 }
1285 
1286 static void
1287 vmd_bus_handle_hotplug(struct vmd_pci_bus *bus)
1288 {
1289 	uint8_t num_devices, sleep_count;
1290 
1291 	for (sleep_count = 0; sleep_count < 20; ++sleep_count) {
1292 		/* Scan until a new device is found */
1293 		num_devices = vmd_scan_single_bus(bus, bus->self);
1294 		if (num_devices > 0) {
1295 			break;
1296 		}
1297 
1298 		spdk_delay_us(200000);
1299 	}
1300 
1301 	if (num_devices == 0) {
1302 		SPDK_ERRLOG("Timed out while scanning for hotplugged devices\n");
1303 	}
1304 }
1305 
1306 static void
1307 vmd_bus_handle_hotremove(struct vmd_pci_bus *bus)
1308 {
1309 	struct vmd_pci_device *device, *tmpdev;
1310 
1311 	TAILQ_FOREACH_SAFE(device, &bus->dev_list, tailq, tmpdev) {
1312 		if (!vmd_bus_device_present(bus, device->devfn)) {
1313 			device->pci.internal.pending_removal = true;
1314 
1315 			/* If the device isn't attached, remove it immediately */
1316 			if (!device->pci.internal.attached) {
1317 				vmd_dev_detach(&device->pci);
1318 			}
1319 		}
1320 	}
1321 }
1322 
1323 int
1324 spdk_vmd_hotplug_monitor(void)
1325 {
1326 	struct vmd_pci_bus *bus;
1327 	struct vmd_pci_device *device;
1328 	int num_hotplugs = 0;
1329 	uint32_t i;
1330 
1331 	for (i = 0; i < g_vmd_container.count; ++i) {
1332 		TAILQ_FOREACH(bus, &g_vmd_container.vmd[i].bus_list, tailq) {
1333 			device = bus->self;
1334 			if (device == NULL || !device->hotplug_capable) {
1335 				continue;
1336 			}
1337 
1338 			if (device->pcie_cap->slot_status.bit_field.datalink_state_changed != 1) {
1339 				continue;
1340 			}
1341 
1342 			if (device->pcie_cap->link_status.bit_field.datalink_layer_active == 1) {
1343 				SPDK_DEBUGLOG(vmd, "Device hotplug detected on bus "
1344 					      "%"PRIu32"\n", bus->bus_number);
1345 				vmd_bus_handle_hotplug(bus);
1346 			} else {
1347 				SPDK_DEBUGLOG(vmd, "Device hotremove detected on bus "
1348 					      "%"PRIu32"\n", bus->bus_number);
1349 				vmd_bus_handle_hotremove(bus);
1350 			}
1351 
1352 			vmd_clear_hotplug_status(bus);
1353 			num_hotplugs++;
1354 		}
1355 	}
1356 
1357 	return num_hotplugs;
1358 }
1359 
1360 int
1361 spdk_vmd_init(void)
1362 {
1363 	return spdk_pci_enumerate(spdk_pci_vmd_get_driver(), vmd_enum_cb, &g_vmd_container);
1364 }
1365 
1366 void
1367 spdk_vmd_fini(void)
1368 {
1369 	uint32_t i;
1370 
1371 	for (i = 0; i < g_vmd_container.count; ++i) {
1372 		spdk_pci_device_detach(g_vmd_container.vmd[i].pci);
1373 	}
1374 }
1375 
1376 SPDK_LOG_REGISTER_COMPONENT(vmd)
1377