xref: /spdk/lib/vmd/vmd.c (revision cc6920a4763d4b9a43aa40583c8397d8f14fa100)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "vmd.h"
35 
36 #include "spdk/stdinc.h"
37 #include "spdk/likely.h"
38 
39 static unsigned char *device_type[] = {
40 	"PCI Express Endpoint",
41 	"Legacy PCI Express Endpoint",
42 	"Reserved 1",
43 	"Reserved 2",
44 	"Root Port of PCI Express Root Complex",
45 	"Upstream Port of PCI Express Switch",
46 	"Downstream Port of PCI Express Switch",
47 	"PCI Express to PCI/PCI-X Bridge",
48 	"PCI/PCI-X to PCI Express Bridge",
49 	"Root Complex Integrated Endpoint",
50 	"Root Complex Event Collector",
51 	"Reserved Capability"
52 };
53 
54 /*
55  * Container for all VMD adapter probed in the system.
56  */
57 struct vmd_container {
58 	uint32_t count;
59 	struct vmd_adapter vmd[MAX_VMD_SUPPORTED];
60 };
61 
62 static struct vmd_container g_vmd_container;
63 static uint8_t g_end_device_count;
64 
65 static bool
66 vmd_is_valid_cfg_addr(struct vmd_pci_bus *bus, uint64_t addr)
67 {
68 	return addr >= (uint64_t)bus->vmd->cfg_vaddr &&
69 	       addr < bus->vmd->cfgbar_size + (uint64_t)bus->vmd->cfg_vaddr;
70 }
71 
72 static void
73 vmd_align_base_addrs(struct vmd_adapter *vmd, uint32_t alignment)
74 {
75 	uint32_t pad;
76 
77 	/*
78 	 *  Device is not in hot plug path, align the base address remaining from membar 1.
79 	 */
80 	if (vmd->physical_addr & (alignment - 1)) {
81 		pad = alignment - (vmd->physical_addr & (alignment - 1));
82 		vmd->physical_addr += pad;
83 		vmd->current_addr_size -= pad;
84 	}
85 }
86 
87 static bool
88 vmd_device_is_enumerated(const struct vmd_pci_device *vmd_device)
89 {
90 	return vmd_device->header->one.prefetch_base_upper == VMD_UPPER_BASE_SIGNATURE &&
91 	       vmd_device->header->one.prefetch_limit_upper == VMD_UPPER_LIMIT_SIGNATURE;
92 }
93 
94 static bool
95 vmd_device_is_root_port(const struct vmd_pci_device *vmd_device)
96 {
97 	return vmd_device->header->common.vendor_id == SPDK_PCI_VID_INTEL &&
98 	       (vmd_device->header->common.device_id == PCI_ROOT_PORT_A_INTEL_SKX ||
99 		vmd_device->header->common.device_id == PCI_ROOT_PORT_B_INTEL_SKX ||
100 		vmd_device->header->common.device_id == PCI_ROOT_PORT_C_INTEL_SKX ||
101 		vmd_device->header->common.device_id == PCI_ROOT_PORT_D_INTEL_SKX ||
102 		vmd_device->header->common.device_id == PCI_ROOT_PORT_A_INTEL_ICX ||
103 		vmd_device->header->common.device_id == PCI_ROOT_PORT_B_INTEL_ICX ||
104 		vmd_device->header->common.device_id == PCI_ROOT_PORT_C_INTEL_ICX ||
105 		vmd_device->header->common.device_id == PCI_ROOT_PORT_D_INTEL_ICX);
106 }
107 
108 static void
109 vmd_hotplug_coalesce_regions(struct vmd_hot_plug *hp)
110 {
111 	struct pci_mem_mgr *region, *prev;
112 
113 	do {
114 		prev = NULL;
115 		TAILQ_FOREACH(region, &hp->free_mem_queue, tailq) {
116 			if (prev != NULL && (prev->addr + prev->size == region->addr)) {
117 				break;
118 			}
119 
120 			prev = region;
121 		}
122 
123 		if (region != NULL) {
124 			prev->size += region->size;
125 			TAILQ_REMOVE(&hp->free_mem_queue, region, tailq);
126 			TAILQ_INSERT_TAIL(&hp->unused_mem_queue, region, tailq);
127 		}
128 	} while (region != NULL);
129 }
130 
131 static void
132 vmd_hotplug_free_region(struct vmd_hot_plug *hp, struct pci_mem_mgr *region)
133 {
134 	struct pci_mem_mgr *current, *prev = NULL;
135 
136 	assert(region->addr >= hp->bar.start && region->addr < hp->bar.start + hp->bar.size);
137 
138 	TAILQ_FOREACH(current, &hp->free_mem_queue, tailq) {
139 		if (current->addr > region->addr) {
140 			break;
141 		}
142 
143 		prev = current;
144 	}
145 
146 	if (prev != NULL) {
147 		assert(prev->addr + prev->size <= region->addr);
148 		assert(current == NULL || (region->addr + region->size <= current->addr));
149 		TAILQ_INSERT_AFTER(&hp->free_mem_queue, prev, region, tailq);
150 	} else {
151 		TAILQ_INSERT_HEAD(&hp->free_mem_queue, region, tailq);
152 	}
153 
154 	vmd_hotplug_coalesce_regions(hp);
155 }
156 
157 static void
158 vmd_hotplug_free_addr(struct vmd_hot_plug *hp, uint64_t addr)
159 {
160 	struct pci_mem_mgr *region;
161 
162 	TAILQ_FOREACH(region, &hp->alloc_mem_queue, tailq) {
163 		if (region->addr == addr) {
164 			break;
165 		}
166 	}
167 
168 	assert(region != NULL);
169 	TAILQ_REMOVE(&hp->alloc_mem_queue, region, tailq);
170 
171 	vmd_hotplug_free_region(hp, region);
172 }
173 
174 static uint64_t
175 vmd_hotplug_allocate_base_addr(struct vmd_hot_plug *hp, uint32_t size)
176 {
177 	struct pci_mem_mgr *region = NULL, *free_region;
178 
179 	TAILQ_FOREACH(region, &hp->free_mem_queue, tailq) {
180 		if (region->size >= size) {
181 			break;
182 		}
183 	}
184 
185 	if (region == NULL) {
186 		SPDK_DEBUGLOG(vmd, "Unable to find free hotplug memory region of size:"
187 			      "%"PRIx32"\n", size);
188 		return 0;
189 	}
190 
191 	TAILQ_REMOVE(&hp->free_mem_queue, region, tailq);
192 	if (size < region->size) {
193 		free_region = TAILQ_FIRST(&hp->unused_mem_queue);
194 		if (free_region == NULL) {
195 			SPDK_DEBUGLOG(vmd, "Unable to find unused descriptor to store the "
196 				      "free region of size: %"PRIu32"\n", region->size - size);
197 		} else {
198 			TAILQ_REMOVE(&hp->unused_mem_queue, free_region, tailq);
199 			free_region->size = region->size - size;
200 			free_region->addr = region->addr + size;
201 			region->size = size;
202 			vmd_hotplug_free_region(hp, free_region);
203 		}
204 	}
205 
206 	TAILQ_INSERT_TAIL(&hp->alloc_mem_queue, region, tailq);
207 
208 	return region->addr;
209 }
210 
211 /*
212  *  Allocates an address from vmd membar for the input memory size
213  *  vmdAdapter - vmd adapter object
214  *  dev - vmd_pci_device to allocate a base address for.
215  *  size - size of the memory window requested.
216  *  Size must be an integral multiple of 2. Addresses are returned on the size boundary.
217  *  Returns physical address within the VMD membar window, or 0x0 if cannot allocate window.
218  *  Consider increasing the size of vmd membar if 0x0 is returned.
219  */
220 static uint64_t
221 vmd_allocate_base_addr(struct vmd_adapter *vmd, struct vmd_pci_device *dev, uint32_t size)
222 {
223 	uint64_t base_address = 0, padding = 0;
224 	struct vmd_pci_bus *hp_bus;
225 
226 	if (size && ((size & (~size + 1)) != size)) {
227 		return base_address;
228 	}
229 
230 	/*
231 	 *  If device is downstream of a hot plug port, allocate address from the
232 	 *  range dedicated for the hot plug slot. Search the list of addresses allocated to determine
233 	 *  if a free range exists that satisfy the input request.  If a free range cannot be found,
234 	 *  get a buffer from the  unused chunk. First fit algorithm, is used.
235 	 */
236 	if (dev) {
237 		hp_bus = dev->parent;
238 		if (hp_bus && hp_bus->self && hp_bus->self->hotplug_capable) {
239 			return vmd_hotplug_allocate_base_addr(&hp_bus->self->hp, size);
240 		}
241 	}
242 
243 	/* Ensure physical membar allocated is size aligned */
244 	if (vmd->physical_addr & (size - 1)) {
245 		padding = size - (vmd->physical_addr & (size - 1));
246 	}
247 
248 	/* Allocate from membar if enough memory is left */
249 	if (vmd->current_addr_size >= size + padding) {
250 		base_address = vmd->physical_addr + padding;
251 		vmd->physical_addr += size + padding;
252 		vmd->current_addr_size -= size + padding;
253 	}
254 
255 	SPDK_DEBUGLOG(vmd, "allocated(size) %" PRIx64 " (%x)\n", base_address, size);
256 
257 	return base_address;
258 }
259 
260 static bool
261 vmd_is_end_device(struct vmd_pci_device *dev)
262 {
263 	return (dev && dev->header) &&
264 	       ((dev->header->common.header_type & ~PCI_MULTI_FUNCTION) == PCI_HEADER_TYPE_NORMAL);
265 }
266 
267 static void
268 vmd_update_base_limit_register(struct vmd_pci_device *dev, uint16_t base, uint16_t limit)
269 {
270 	struct vmd_pci_bus *bus;
271 	struct vmd_pci_device *bridge;
272 
273 	if (base == 0 ||  limit == 0) {
274 		return;
275 	}
276 
277 	if (dev->header->common.header_type == PCI_HEADER_TYPE_BRIDGE) {
278 		bus = dev->bus_object;
279 	} else {
280 		bus = dev->parent;
281 	}
282 
283 	bridge = bus->self;
284 	SPDK_DEBUGLOG(vmd, "base:limit = %x:%x\n", bridge->header->one.mem_base,
285 		      bridge->header->one.mem_limit);
286 
287 	if (dev->bus->vmd->scan_completed) {
288 		return;
289 	}
290 
291 	while (bus && bus->self != NULL) {
292 		bridge = bus->self;
293 
294 		/* This is only for 32-bit memory space, need to revisit to support 64-bit */
295 		if (bridge->header->one.mem_base > base) {
296 			bridge->header->one.mem_base = base;
297 			base = bridge->header->one.mem_base;
298 		}
299 
300 		if (bridge->header->one.mem_limit < limit) {
301 			bridge->header->one.mem_limit = limit;
302 			limit = bridge->header->one.mem_limit;
303 		}
304 
305 		bus = bus->parent;
306 	}
307 }
308 
309 static uint64_t
310 vmd_get_base_addr(struct vmd_pci_device *dev, uint32_t index, uint32_t size)
311 {
312 	struct vmd_pci_bus *bus = dev->parent;
313 
314 	if (dev->header_type == PCI_HEADER_TYPE_BRIDGE) {
315 		return dev->header->zero.BAR[index] & ~0xf;
316 	} else {
317 		if (bus->self->hotplug_capable) {
318 			return vmd_hotplug_allocate_base_addr(&bus->self->hp, size);
319 		} else {
320 			return (uint64_t)bus->self->header->one.mem_base << 16;
321 		}
322 	}
323 }
324 
325 static bool
326 vmd_assign_base_addrs(struct vmd_pci_device *dev)
327 {
328 	uint16_t mem_base = 0, mem_limit = 0;
329 	unsigned char mem_attr = 0;
330 	int last;
331 	struct vmd_adapter *vmd = NULL;
332 	bool ret_val = false;
333 	uint32_t bar_value;
334 	uint32_t table_offset;
335 
336 	if (dev && dev->bus) {
337 		vmd = dev->bus->vmd;
338 	}
339 
340 	if (!vmd) {
341 		return 0;
342 	}
343 
344 	vmd_align_base_addrs(vmd, ONE_MB);
345 
346 	last = dev->header_type ? 2 : 6;
347 	for (int i = 0; i < last; i++) {
348 		bar_value = dev->header->zero.BAR[i];
349 		dev->header->zero.BAR[i] = ~(0U);
350 		dev->bar[i].size = dev->header->zero.BAR[i];
351 		dev->header->zero.BAR[i] = bar_value;
352 
353 		if (dev->bar[i].size == ~(0U) || dev->bar[i].size == 0  ||
354 		    dev->header->zero.BAR[i] & 1) {
355 			dev->bar[i].size = 0;
356 			continue;
357 		}
358 		mem_attr = dev->bar[i].size & PCI_BASE_ADDR_MASK;
359 		dev->bar[i].size = TWOS_COMPLEMENT(dev->bar[i].size & PCI_BASE_ADDR_MASK);
360 
361 		if (vmd->scan_completed) {
362 			dev->bar[i].start = vmd_get_base_addr(dev, i, dev->bar[i].size);
363 		} else {
364 			dev->bar[i].start = vmd_allocate_base_addr(vmd, dev, dev->bar[i].size);
365 		}
366 
367 		dev->header->zero.BAR[i] = (uint32_t)dev->bar[i].start;
368 
369 		if (!dev->bar[i].start) {
370 			if (mem_attr == (PCI_BAR_MEMORY_PREFETCH | PCI_BAR_MEMORY_TYPE_64)) {
371 				i++;
372 			}
373 			continue;
374 		}
375 
376 		dev->bar[i].vaddr = ((uint64_t)vmd->mem_vaddr + (dev->bar[i].start - vmd->membar));
377 		mem_limit = BRIDGE_BASEREG(dev->header->zero.BAR[i]) +
378 			    BRIDGE_BASEREG(dev->bar[i].size - 1);
379 		if (!mem_base) {
380 			mem_base = BRIDGE_BASEREG(dev->header->zero.BAR[i]);
381 		}
382 
383 		ret_val = true;
384 
385 		if (mem_attr == (PCI_BAR_MEMORY_PREFETCH | PCI_BAR_MEMORY_TYPE_64)) {
386 			i++;
387 			if (i < last) {
388 				dev->header->zero.BAR[i] = (uint32_t)(dev->bar[i].start >> PCI_DWORD_SHIFT);
389 			}
390 		}
391 	}
392 
393 	/* Enable device MEM and bus mastering */
394 	dev->header->zero.command |= (PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
395 	uint16_t cmd = dev->header->zero.command;
396 	cmd++;
397 
398 	if (dev->msix_cap && ret_val) {
399 		table_offset = ((volatile struct pci_msix_cap *)dev->msix_cap)->msix_table_offset;
400 		if (dev->bar[table_offset & 0x3].vaddr) {
401 			dev->msix_table = (volatile struct pci_msix_table_entry *)
402 					  (dev->bar[table_offset & 0x3].vaddr + (table_offset & 0xfff8));
403 		}
404 	}
405 
406 	if (ret_val && vmd_is_end_device(dev)) {
407 		vmd_update_base_limit_register(dev, mem_base, mem_limit);
408 	}
409 
410 	return ret_val;
411 }
412 
413 static void
414 vmd_get_device_capabilities(struct vmd_pci_device *dev)
415 
416 {
417 	volatile uint8_t *config_space;
418 	uint8_t capabilities_offset;
419 	struct pci_capabilities_header *capabilities_hdr;
420 
421 	config_space = (volatile uint8_t *)dev->header;
422 	if ((dev->header->common.status  & PCI_CAPABILITIES_LIST) == 0) {
423 		return;
424 	}
425 
426 	capabilities_offset = dev->header->zero.cap_pointer;
427 	if (dev->header->common.header_type & PCI_HEADER_TYPE_BRIDGE) {
428 		capabilities_offset = dev->header->one.cap_pointer;
429 	}
430 
431 	while (capabilities_offset > 0) {
432 		capabilities_hdr = (struct pci_capabilities_header *)
433 				   &config_space[capabilities_offset];
434 		switch (capabilities_hdr->capability_id) {
435 		case CAPABILITY_ID_PCI_EXPRESS:
436 			dev->pcie_cap = (volatile struct pci_express_cap *)(capabilities_hdr);
437 			break;
438 
439 		case CAPABILITY_ID_MSI:
440 			dev->msi_cap = (volatile struct pci_msi_cap *)capabilities_hdr;
441 			break;
442 
443 		case CAPABILITY_ID_MSIX:
444 			dev->msix_cap = (volatile struct pci_msix_capability *)capabilities_hdr;
445 			dev->msix_table_size = dev->msix_cap->message_control.bit.table_size + 1;
446 			break;
447 
448 		default:
449 			break;
450 		}
451 		capabilities_offset = capabilities_hdr->next;
452 	}
453 }
454 
455 static volatile struct pci_enhanced_capability_header *
456 vmd_get_enhanced_capabilities(struct vmd_pci_device *dev, uint16_t capability_id)
457 {
458 	uint8_t *data;
459 	uint16_t cap_offset = EXTENDED_CAPABILITY_OFFSET;
460 	volatile struct pci_enhanced_capability_header *cap_hdr = NULL;
461 
462 	data = (uint8_t *)dev->header;
463 	while (cap_offset >= EXTENDED_CAPABILITY_OFFSET) {
464 		cap_hdr = (volatile struct pci_enhanced_capability_header *) &data[cap_offset];
465 		if (cap_hdr->capability_id == capability_id) {
466 			return cap_hdr;
467 		}
468 		cap_offset = cap_hdr->next;
469 		if (cap_offset == 0 || cap_offset < EXTENDED_CAPABILITY_OFFSET) {
470 			break;
471 		}
472 	}
473 
474 	return NULL;
475 }
476 
477 static void
478 vmd_read_config_space(struct vmd_pci_device *dev)
479 {
480 	/*
481 	 * Writes to the pci config space is posted weite. To ensure transaction reaches its destination
482 	 * before another write is posed, an immediate read of the written value should be performed.
483 	 */
484 	dev->header->common.command |= (BUS_MASTER_ENABLE | MEMORY_SPACE_ENABLE);
485 	{ uint16_t cmd = dev->header->common.command; (void)cmd; }
486 
487 	vmd_get_device_capabilities(dev);
488 	dev->sn_cap = (struct serial_number_capability *)vmd_get_enhanced_capabilities(dev,
489 			DEVICE_SERIAL_NUMBER_CAP_ID);
490 }
491 
492 static void
493 vmd_update_scan_info(struct vmd_pci_device *dev)
494 {
495 	struct vmd_adapter *vmd_adapter = dev->bus->vmd;
496 
497 	if (vmd_adapter->root_port_updated) {
498 		return;
499 	}
500 
501 	if (dev->header_type == PCI_HEADER_TYPE_NORMAL) {
502 		return;
503 	}
504 
505 	if (vmd_device_is_root_port(dev)) {
506 		vmd_adapter->root_port_updated = 1;
507 		SPDK_DEBUGLOG(vmd, "root_port_updated = %d\n",
508 			      vmd_adapter->root_port_updated);
509 		SPDK_DEBUGLOG(vmd, "upper:limit = %x : %x\n",
510 			      dev->header->one.prefetch_base_upper,
511 			      dev->header->one.prefetch_limit_upper);
512 		if (vmd_device_is_enumerated(dev)) {
513 			vmd_adapter->scan_completed = 1;
514 			SPDK_DEBUGLOG(vmd, "scan_completed = %d\n",
515 				      vmd_adapter->scan_completed);
516 		}
517 	}
518 }
519 
520 static void
521 vmd_reset_base_limit_registers(struct vmd_pci_device *dev)
522 {
523 	uint32_t reg __attribute__((unused));
524 
525 	assert(dev->header_type != PCI_HEADER_TYPE_NORMAL);
526 	/*
527 	 * Writes to the pci config space are posted writes.
528 	 * To ensure transaction reaches its destination
529 	 * before another write is posted, an immediate read
530 	 * of the written value should be performed.
531 	 */
532 	dev->header->one.mem_base = 0xfff0;
533 	reg = dev->header->one.mem_base;
534 	dev->header->one.mem_limit = 0x0;
535 	reg = dev->header->one.mem_limit;
536 	dev->header->one.prefetch_base = 0x0;
537 	reg = dev->header->one.prefetch_base;
538 	dev->header->one.prefetch_limit = 0x0;
539 	reg = dev->header->one.prefetch_limit;
540 	dev->header->one.prefetch_base_upper = 0x0;
541 	reg = dev->header->one.prefetch_base_upper;
542 	dev->header->one.prefetch_limit_upper = 0x0;
543 	reg = dev->header->one.prefetch_limit_upper;
544 	dev->header->one.io_base_upper = 0x0;
545 	reg = dev->header->one.io_base_upper;
546 	dev->header->one.io_limit_upper = 0x0;
547 	reg = dev->header->one.io_limit_upper;
548 	dev->header->one.primary = 0;
549 	reg = dev->header->one.primary;
550 	dev->header->one.secondary = 0;
551 	reg = dev->header->one.secondary;
552 	dev->header->one.subordinate = 0;
553 	reg = dev->header->one.subordinate;
554 }
555 
556 static void
557 vmd_init_hotplug(struct vmd_pci_device *dev, struct vmd_pci_bus *bus)
558 {
559 	struct vmd_adapter *vmd = bus->vmd;
560 	struct vmd_hot_plug *hp = &dev->hp;
561 	size_t mem_id;
562 
563 	dev->hotplug_capable = true;
564 	hp->bar.size = 1 << 20;
565 
566 	if (!vmd->scan_completed) {
567 		hp->bar.start = vmd_allocate_base_addr(vmd, NULL, hp->bar.size);
568 		bus->self->header->one.mem_base = BRIDGE_BASEREG(hp->bar.start);
569 		bus->self->header->one.mem_limit =
570 			bus->self->header->one.mem_base + BRIDGE_BASEREG(hp->bar.size - 1);
571 	} else {
572 		hp->bar.start = (uint64_t)bus->self->header->one.mem_base << 16;
573 	}
574 
575 	hp->bar.vaddr = (uint64_t)vmd->mem_vaddr + (hp->bar.start - vmd->membar);
576 
577 	TAILQ_INIT(&hp->free_mem_queue);
578 	TAILQ_INIT(&hp->unused_mem_queue);
579 	TAILQ_INIT(&hp->alloc_mem_queue);
580 
581 	hp->mem[0].size = hp->bar.size;
582 	hp->mem[0].addr = hp->bar.start;
583 
584 	TAILQ_INSERT_TAIL(&hp->free_mem_queue, &hp->mem[0], tailq);
585 
586 	for (mem_id = 1; mem_id < ADDR_ELEM_COUNT; ++mem_id) {
587 		TAILQ_INSERT_TAIL(&hp->unused_mem_queue, &hp->mem[mem_id], tailq);
588 	}
589 
590 	SPDK_DEBUGLOG(vmd, "%s: mem_base:mem_limit = %x : %x\n", __func__,
591 		      bus->self->header->one.mem_base, bus->self->header->one.mem_limit);
592 }
593 
594 static bool
595 vmd_bus_device_present(struct vmd_pci_bus *bus, uint32_t devfn)
596 {
597 	volatile struct pci_header *header;
598 
599 	header = (volatile struct pci_header *)(bus->vmd->cfg_vaddr +
600 						CONFIG_OFFSET_ADDR(bus->config_bus_number, devfn, 0, 0));
601 	if (!vmd_is_valid_cfg_addr(bus, (uint64_t)header)) {
602 		return false;
603 	}
604 
605 	if (header->common.vendor_id == PCI_INVALID_VENDORID || header->common.vendor_id == 0x0) {
606 		return false;
607 	}
608 
609 	return true;
610 }
611 
612 static struct vmd_pci_device *
613 vmd_alloc_dev(struct vmd_pci_bus *bus, uint32_t devfn)
614 {
615 	struct vmd_pci_device *dev = NULL;
616 	struct pci_header volatile *header;
617 	uint8_t header_type;
618 	uint32_t rev_class;
619 
620 	/* Make sure we're not creating two devices on the same dev/fn */
621 	TAILQ_FOREACH(dev, &bus->dev_list, tailq) {
622 		if (dev->devfn == devfn) {
623 			return NULL;
624 		}
625 	}
626 
627 	if (!vmd_bus_device_present(bus, devfn)) {
628 		return NULL;
629 	}
630 
631 	header = (struct pci_header * volatile)(bus->vmd->cfg_vaddr +
632 						CONFIG_OFFSET_ADDR(bus->config_bus_number, devfn, 0, 0));
633 
634 	SPDK_DEBUGLOG(vmd, "PCI device found: %04x:%04x ***\n",
635 		      header->common.vendor_id, header->common.device_id);
636 
637 	dev = calloc(1, sizeof(*dev));
638 	if (!dev) {
639 		return NULL;
640 	}
641 
642 	dev->header = header;
643 	dev->vid = dev->header->common.vendor_id;
644 	dev->did = dev->header->common.device_id;
645 	dev->bus = bus;
646 	dev->parent = bus;
647 	dev->devfn = devfn;
648 	header_type = dev->header->common.header_type;
649 	rev_class = dev->header->common.rev_class;
650 	dev->class = rev_class >> 8;
651 	dev->header_type = header_type & 0x7;
652 
653 	if (header_type == PCI_HEADER_TYPE_BRIDGE) {
654 		vmd_update_scan_info(dev);
655 		if (!dev->bus->vmd->scan_completed) {
656 			vmd_reset_base_limit_registers(dev);
657 		}
658 	}
659 
660 	vmd_read_config_space(dev);
661 
662 	return dev;
663 }
664 
665 static struct vmd_pci_bus *
666 vmd_create_new_bus(struct vmd_pci_bus *parent, struct vmd_pci_device *bridge, uint8_t bus_number)
667 {
668 	struct vmd_pci_bus *new_bus;
669 
670 	new_bus = calloc(1, sizeof(*new_bus));
671 	if (!new_bus) {
672 		return NULL;
673 	}
674 
675 	new_bus->parent = parent;
676 	new_bus->domain = parent->domain;
677 	new_bus->bus_number = bus_number;
678 	new_bus->secondary_bus = new_bus->subordinate_bus = bus_number;
679 	new_bus->self = bridge;
680 	new_bus->vmd = parent->vmd;
681 	new_bus->config_bus_number = new_bus->bus_number - new_bus->vmd->vmd_bus.bus_start;
682 	TAILQ_INIT(&new_bus->dev_list);
683 
684 	bridge->subordinate = new_bus;
685 
686 	bridge->pci.addr.bus = new_bus->bus_number;
687 	bridge->pci.addr.dev = bridge->devfn;
688 	bridge->pci.addr.func = 0;
689 	bridge->pci.addr.domain = parent->vmd->pci->addr.domain;
690 
691 	return new_bus;
692 }
693 
694 /*
695  * Assigns a bus number from the list of available
696  * bus numbers. If the device is downstream of a hot plug port,
697  * assign the bus number from those assigned to the HP port. Otherwise,
698  * assign the next bus number from the vmd bus number list.
699  */
700 static uint8_t
701 vmd_get_next_bus_number(struct vmd_pci_device *dev, struct vmd_adapter *vmd)
702 {
703 	uint8_t bus = 0xff;
704 	struct vmd_pci_bus *hp_bus;
705 
706 	if (dev) {
707 		hp_bus = vmd_is_dev_in_hotplug_path(dev);
708 		if (hp_bus && hp_bus->self && hp_bus->self->hotplug_capable) {
709 			return vmd_hp_get_next_bus_number(&hp_bus->self->hp);
710 		}
711 	}
712 
713 	/* Device is not under a hot plug path. Return next global bus number */
714 	if ((vmd->next_bus_number + 1) < vmd->max_pci_bus) {
715 		bus = vmd->next_bus_number;
716 		vmd->next_bus_number++;
717 	}
718 	return bus;
719 }
720 
721 static uint8_t
722 vmd_get_hotplug_bus_numbers(struct vmd_pci_device *dev)
723 {
724 	uint8_t bus_number = 0xff;
725 
726 	if (dev && dev->bus && dev->bus->vmd &&
727 	    ((dev->bus->vmd->next_bus_number + RESERVED_HOTPLUG_BUSES) < dev->bus->vmd->max_pci_bus)) {
728 		bus_number = RESERVED_HOTPLUG_BUSES;
729 		dev->bus->vmd->next_bus_number += RESERVED_HOTPLUG_BUSES;
730 	}
731 
732 	return bus_number;
733 }
734 
735 static void
736 vmd_enable_msix(struct vmd_pci_device *dev)
737 {
738 	volatile uint16_t control;
739 
740 	control = dev->msix_cap->message_control.as_uint16_t | (1 << 14);
741 	dev->msix_cap->message_control.as_uint16_t = control;
742 	control = dev->msix_cap->message_control.as_uint16_t;
743 	dev->msix_cap->message_control.as_uint16_t = (control | (1 << 15));
744 	control = dev->msix_cap->message_control.as_uint16_t;
745 	control = control & ~(1 << 14);
746 	dev->msix_cap->message_control.as_uint16_t = control;
747 	control = dev->msix_cap->message_control.as_uint16_t;
748 }
749 
750 static void
751 vmd_disable_msix(struct vmd_pci_device *dev)
752 {
753 	volatile uint16_t control;
754 
755 	control = dev->msix_cap->message_control.as_uint16_t | (1 << 14);
756 	dev->msix_cap->message_control.as_uint16_t = control;
757 	control = dev->msix_cap->message_control.as_uint16_t & ~(1 << 15);
758 	dev->msix_cap->message_control.as_uint16_t = control;
759 	control = dev->msix_cap->message_control.as_uint16_t;
760 }
761 
762 /*
763  * Set up MSI-X table entries for the port. Vmd MSIX vector 0 is used for
764  * port interrupt, so vector 0 is mapped to all MSIX entries for the port.
765  */
766 static void
767 vmd_setup_msix(struct vmd_pci_device *dev, volatile struct pci_msix_table_entry *vmdEntry)
768 {
769 	int entry;
770 
771 	if (!dev || !vmdEntry || !dev->msix_cap) {
772 		return;
773 	}
774 
775 	vmd_disable_msix(dev);
776 	if (dev->msix_table == NULL || dev->msix_table_size > MAX_MSIX_TABLE_SIZE) {
777 		return;
778 	}
779 
780 	for (entry = 0; entry < dev->msix_table_size; ++entry) {
781 		dev->msix_table[entry].vector_control = 1;
782 	}
783 	vmd_enable_msix(dev);
784 }
785 
786 static void
787 vmd_bus_update_bridge_info(struct vmd_pci_device *bridge)
788 {
789 	/* Update the subordinate bus of all bridges above this bridge */
790 	volatile struct vmd_pci_device *dev = bridge;
791 	uint8_t subordinate_bus;
792 
793 	if (!dev) {
794 		return;
795 	}
796 	subordinate_bus = bridge->header->one.subordinate;
797 	while (dev->parent_bridge != NULL) {
798 		dev = dev->parent_bridge;
799 		if (dev->header->one.subordinate < subordinate_bus) {
800 			dev->header->one.subordinate = subordinate_bus;
801 			subordinate_bus = dev->header->one.subordinate;
802 		}
803 	}
804 }
805 
806 static bool
807 vmd_is_supported_device(struct vmd_pci_device *dev)
808 {
809 	return dev->class == PCI_CLASS_STORAGE_EXPRESS;
810 }
811 
812 static int
813 vmd_dev_map_bar(struct spdk_pci_device *pci_dev, uint32_t bar,
814 		void **mapped_addr, uint64_t *phys_addr, uint64_t *size)
815 {
816 	struct vmd_pci_device *dev = SPDK_CONTAINEROF(pci_dev, struct vmd_pci_device, pci);
817 
818 	*size = dev->bar[bar].size;
819 	*phys_addr = dev->bar[bar].start;
820 	*mapped_addr = (void *)dev->bar[bar].vaddr;
821 
822 	return 0;
823 }
824 
825 static int
826 vmd_dev_unmap_bar(struct spdk_pci_device *_dev, uint32_t bar, void *addr)
827 {
828 	return 0;
829 }
830 
831 static int
832 vmd_dev_cfg_read(struct spdk_pci_device *_dev, void *value, uint32_t len,
833 		 uint32_t offset)
834 {
835 	struct vmd_pci_device *dev = SPDK_CONTAINEROF(_dev, struct vmd_pci_device, pci);
836 	volatile uint8_t *src = (volatile uint8_t *)dev->header;
837 	uint8_t *dst = value;
838 	size_t i;
839 
840 	if (len + offset > PCI_MAX_CFG_SIZE) {
841 		return -1;
842 	}
843 
844 	for (i = 0; i < len; ++i) {
845 		dst[i] = src[offset + i];
846 	}
847 
848 	return 0;
849 }
850 
851 static int
852 vmd_dev_cfg_write(struct spdk_pci_device *_dev,  void *value,
853 		  uint32_t len, uint32_t offset)
854 {
855 	struct vmd_pci_device *dev = SPDK_CONTAINEROF(_dev, struct vmd_pci_device, pci);
856 	volatile uint8_t *dst = (volatile uint8_t *)dev->header;
857 	uint8_t *src = value;
858 	size_t i;
859 
860 	if ((len + offset) > PCI_MAX_CFG_SIZE) {
861 		return -1;
862 	}
863 
864 	for (i = 0; i < len; ++i) {
865 		dst[offset + i] = src[i];
866 	}
867 
868 	return 0;
869 }
870 
871 static void
872 vmd_dev_detach(struct spdk_pci_device *dev)
873 {
874 	struct vmd_pci_device *vmd_device = (struct vmd_pci_device *)dev;
875 	struct vmd_pci_device *bus_device = vmd_device->bus->self;
876 	struct vmd_pci_bus *bus = vmd_device->bus;
877 	size_t i, num_bars = vmd_device->header_type ? 2 : 6;
878 
879 	spdk_pci_unhook_device(dev);
880 	TAILQ_REMOVE(&bus->dev_list, vmd_device, tailq);
881 
882 	/* Release the hotplug region if the device is under hotplug-capable bus */
883 	if (bus_device && bus_device->hotplug_capable) {
884 		for (i = 0; i < num_bars; ++i) {
885 			if (vmd_device->bar[i].start != 0) {
886 				vmd_hotplug_free_addr(&bus_device->hp, vmd_device->bar[i].start);
887 			}
888 		}
889 	}
890 
891 	free(dev);
892 }
893 
894 static void
895 vmd_dev_init(struct vmd_pci_device *dev)
896 {
897 	uint8_t bdf[32];
898 
899 	dev->pci.addr.domain = dev->bus->vmd->domain;
900 	dev->pci.addr.bus = dev->bus->bus_number;
901 	dev->pci.addr.dev = dev->devfn;
902 	dev->pci.addr.func = 0;
903 	dev->pci.socket_id = spdk_pci_device_get_socket_id(dev->bus->vmd->pci);
904 	dev->pci.id.vendor_id = dev->header->common.vendor_id;
905 	dev->pci.id.device_id = dev->header->common.device_id;
906 	dev->pci.type = "vmd";
907 	dev->pci.map_bar = vmd_dev_map_bar;
908 	dev->pci.unmap_bar = vmd_dev_unmap_bar;
909 	dev->pci.cfg_read = vmd_dev_cfg_read;
910 	dev->pci.cfg_write = vmd_dev_cfg_write;
911 	dev->hotplug_capable = false;
912 	if (dev->pcie_cap != NULL) {
913 		dev->cached_slot_control = dev->pcie_cap->slot_control;
914 	}
915 
916 	if (vmd_is_supported_device(dev)) {
917 		spdk_pci_addr_fmt(bdf, sizeof(bdf), &dev->pci.addr);
918 		SPDK_DEBUGLOG(vmd, "Initializing NVMe device at %s\n", bdf);
919 		dev->pci.parent = dev->bus->vmd->pci;
920 		spdk_pci_hook_device(spdk_pci_nvme_get_driver(), &dev->pci);
921 	}
922 }
923 
924 /*
925  * Scans a single bus for all devices attached and return a count of
926  * how many devices found. In the VMD topology, it is assume there are no multi-
927  * function devices. Hence a bus(bridge) will not have multi function with both type
928  * 0 and 1 header.
929  *
930  * The other option  for implementing this function is the bus is an int and
931  * create a new device PciBridge. PciBridge would inherit from PciDevice with extra fields,
932  * sub/pri/sec bus. The input becomes PciPort, bus number and parent_bridge.
933  *
934  * The bus number is scanned and if a device is found, based on the header_type, create
935  * either PciBridge(1) or PciDevice(0).
936  *
937  * If a PciBridge, assign bus numbers and rescan new bus. The currently PciBridge being
938  * scanned becomes the passed in parent_bridge with the new bus number.
939  *
940  * The linked list becomes list of pciBridges with PciDevices attached.
941  *
942  * Return count of how many devices found(type1 + type 0 header devices)
943  */
944 static uint8_t
945 vmd_scan_single_bus(struct vmd_pci_bus *bus, struct vmd_pci_device *parent_bridge)
946 {
947 	/* assuming only single function devices are on the bus */
948 	struct vmd_pci_device *new_dev;
949 	struct vmd_adapter *vmd;
950 	union express_slot_capabilities_register slot_cap;
951 	struct vmd_pci_bus *new_bus;
952 	uint8_t  device_number, dev_cnt = 0;
953 	uint8_t new_bus_num;
954 
955 	for (device_number = 0; device_number < 32; device_number++) {
956 		new_dev = vmd_alloc_dev(bus, device_number);
957 		if (new_dev == NULL) {
958 			continue;
959 		}
960 
961 		dev_cnt++;
962 		if (new_dev->header->common.header_type & PCI_HEADER_TYPE_BRIDGE) {
963 			slot_cap.as_uint32_t = 0;
964 			if (new_dev->pcie_cap != NULL) {
965 				slot_cap.as_uint32_t = new_dev->pcie_cap->slot_cap.as_uint32_t;
966 			}
967 
968 			new_bus_num = vmd_get_next_bus_number(bus->vmd->is_hotplug_scan ? new_dev : NULL, bus->vmd);
969 			if (new_bus_num == 0xff) {
970 				free(new_dev);
971 				return dev_cnt;
972 			}
973 			new_bus = vmd_create_new_bus(bus, new_dev, new_bus_num);
974 			if (!new_bus) {
975 				free(new_dev);
976 				return dev_cnt;
977 			}
978 			new_bus->primary_bus = bus->secondary_bus;
979 			new_bus->self = new_dev;
980 			new_dev->bus_object = new_bus;
981 
982 			if (slot_cap.bit_field.hotplug_capable && new_dev->pcie_cap != NULL &&
983 			    new_dev->pcie_cap->express_cap_register.bit_field.slot_implemented) {
984 				new_bus->hotplug_buses = vmd_get_hotplug_bus_numbers(new_dev);
985 				new_bus->subordinate_bus += new_bus->hotplug_buses;
986 
987 				/* Attach hot plug instance if HP is supported */
988 				/* Hot inserted SSDs can be assigned port bus of sub-ordinate + 1 */
989 				SPDK_DEBUGLOG(vmd, "hotplug_capable/slot_implemented = "
990 					      "%x:%x\n", slot_cap.bit_field.hotplug_capable,
991 					      new_dev->pcie_cap->express_cap_register.bit_field.slot_implemented);
992 			}
993 
994 			new_dev->parent_bridge = parent_bridge;
995 			new_dev->header->one.primary = new_bus->primary_bus;
996 			new_dev->header->one.secondary = new_bus->secondary_bus;
997 			new_dev->header->one.subordinate = new_bus->subordinate_bus;
998 
999 			vmd_bus_update_bridge_info(new_dev);
1000 			TAILQ_INSERT_TAIL(&bus->vmd->bus_list, new_bus, tailq);
1001 
1002 			vmd_dev_init(new_dev);
1003 
1004 			if (slot_cap.bit_field.hotplug_capable && new_dev->pcie_cap != NULL &&
1005 			    new_dev->pcie_cap->express_cap_register.bit_field.slot_implemented) {
1006 				vmd_init_hotplug(new_dev, new_bus);
1007 			}
1008 
1009 			dev_cnt += vmd_scan_single_bus(new_bus, new_dev);
1010 			if (new_dev->pcie_cap != NULL) {
1011 				if (new_dev->pcie_cap->express_cap_register.bit_field.device_type == SwitchUpstreamPort) {
1012 					return dev_cnt;
1013 				}
1014 			}
1015 		} else {
1016 			/* Attach the device to the current bus and assign base addresses */
1017 			TAILQ_INSERT_TAIL(&bus->dev_list, new_dev, tailq);
1018 			g_end_device_count++;
1019 			if (vmd_assign_base_addrs(new_dev)) {
1020 				vmd_setup_msix(new_dev, &bus->vmd->msix_table[0]);
1021 				vmd_dev_init(new_dev);
1022 				if (vmd_is_supported_device(new_dev)) {
1023 					vmd = bus->vmd;
1024 					vmd->target[vmd->nvme_count] = new_dev;
1025 					vmd->nvme_count++;
1026 				}
1027 			} else {
1028 				SPDK_DEBUGLOG(vmd, "Removing failed device:%p\n", new_dev);
1029 				TAILQ_REMOVE(&bus->dev_list, new_dev, tailq);
1030 				free(new_dev);
1031 				if (dev_cnt) {
1032 					dev_cnt--;
1033 				}
1034 			}
1035 		}
1036 	}
1037 
1038 	return dev_cnt;
1039 }
1040 
1041 static void
1042 vmd_print_pci_info(struct vmd_pci_device *dev)
1043 {
1044 	if (!dev) {
1045 		return;
1046 	}
1047 
1048 	if (dev->pcie_cap != NULL) {
1049 		SPDK_INFOLOG(vmd, "PCI DEVICE: [%04X:%04X] type(%x) : %s\n",
1050 			     dev->header->common.vendor_id, dev->header->common.device_id,
1051 			     dev->pcie_cap->express_cap_register.bit_field.device_type,
1052 			     device_type[dev->pcie_cap->express_cap_register.bit_field.device_type]);
1053 	} else {
1054 		SPDK_INFOLOG(vmd, "PCI DEVICE: [%04X:%04X]\n",
1055 			     dev->header->common.vendor_id, dev->header->common.device_id);
1056 	}
1057 
1058 	SPDK_INFOLOG(vmd, "\tDOMAIN:BDF: %04x:%02x:%02x:%x\n", dev->pci.addr.domain,
1059 		     dev->pci.addr.bus, dev->pci.addr.dev, dev->pci.addr.func);
1060 
1061 	if (!(dev->header_type & PCI_HEADER_TYPE_BRIDGE) && dev->bus) {
1062 		SPDK_INFOLOG(vmd, "\tbase addr: %x : %p\n",
1063 			     dev->header->zero.BAR[0], (void *)dev->bar[0].vaddr);
1064 	}
1065 
1066 	if ((dev->header_type & PCI_HEADER_TYPE_BRIDGE)) {
1067 		SPDK_INFOLOG(vmd, "\tPrimary = %d, Secondary = %d, Subordinate = %d\n",
1068 			     dev->header->one.primary, dev->header->one.secondary, dev->header->one.subordinate);
1069 		if (dev->pcie_cap && dev->pcie_cap->express_cap_register.bit_field.slot_implemented) {
1070 			SPDK_INFOLOG(vmd, "\tSlot implemented on this device.\n");
1071 			if (dev->pcie_cap->slot_cap.bit_field.hotplug_capable) {
1072 				SPDK_INFOLOG(vmd, "Device has HOT-PLUG capable slot.\n");
1073 			}
1074 		}
1075 	}
1076 
1077 	if (dev->sn_cap != NULL) {
1078 		uint8_t *snLow = (uint8_t *)&dev->sn_cap->sn_low;
1079 		uint8_t *snHi = (uint8_t *)&dev->sn_cap->sn_hi;
1080 
1081 		SPDK_INFOLOG(vmd, "\tSN: %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x\n",
1082 			     snHi[3], snHi[2], snHi[1], snHi[0], snLow[3], snLow[2], snLow[1], snLow[0]);
1083 	}
1084 }
1085 
1086 static void
1087 vmd_cache_scan_info(struct vmd_pci_device *dev)
1088 {
1089 	uint32_t reg __attribute__((unused));
1090 
1091 	if (dev->header_type == PCI_HEADER_TYPE_NORMAL) {
1092 		return;
1093 	}
1094 
1095 	SPDK_DEBUGLOG(vmd, "vendor/device id:%x:%x\n", dev->header->common.vendor_id,
1096 		      dev->header->common.device_id);
1097 
1098 	if (vmd_device_is_root_port(dev)) {
1099 		dev->header->one.prefetch_base_upper = VMD_UPPER_BASE_SIGNATURE;
1100 		reg = dev->header->one.prefetch_base_upper;
1101 		dev->header->one.prefetch_limit_upper = VMD_UPPER_LIMIT_SIGNATURE;
1102 		reg = dev->header->one.prefetch_limit_upper;
1103 
1104 		SPDK_DEBUGLOG(vmd, "prefetch: %x:%x\n",
1105 			      dev->header->one.prefetch_base_upper,
1106 			      dev->header->one.prefetch_limit_upper);
1107 	}
1108 }
1109 
1110 static uint8_t
1111 vmd_scan_pcibus(struct vmd_pci_bus *bus)
1112 {
1113 	struct vmd_pci_bus *bus_entry;
1114 	struct vmd_pci_device *dev;
1115 	uint8_t dev_cnt;
1116 
1117 	g_end_device_count = 0;
1118 	TAILQ_INSERT_TAIL(&bus->vmd->bus_list, bus, tailq);
1119 	bus->vmd->next_bus_number = bus->bus_number + 1;
1120 	dev_cnt = vmd_scan_single_bus(bus, NULL);
1121 
1122 	SPDK_DEBUGLOG(vmd, "VMD scan found %u devices\n", dev_cnt);
1123 	SPDK_DEBUGLOG(vmd, "VMD scan found %u END DEVICES\n", g_end_device_count);
1124 
1125 	SPDK_INFOLOG(vmd, "PCIe devices attached to VMD %04x:%02x:%02x:%x...\n",
1126 		     bus->vmd->pci->addr.domain, bus->vmd->pci->addr.bus,
1127 		     bus->vmd->pci->addr.dev, bus->vmd->pci->addr.func);
1128 
1129 	TAILQ_FOREACH(bus_entry, &bus->vmd->bus_list, tailq) {
1130 		if (bus_entry->self != NULL) {
1131 			vmd_print_pci_info(bus_entry->self);
1132 			vmd_cache_scan_info(bus_entry->self);
1133 		}
1134 
1135 		TAILQ_FOREACH(dev, &bus_entry->dev_list, tailq) {
1136 			vmd_print_pci_info(dev);
1137 		}
1138 	}
1139 
1140 	return dev_cnt;
1141 }
1142 
1143 static int
1144 vmd_map_bars(struct vmd_adapter *vmd, struct spdk_pci_device *dev)
1145 {
1146 	int rc;
1147 
1148 	rc = spdk_pci_device_map_bar(dev, 0, (void **)&vmd->cfg_vaddr,
1149 				     &vmd->cfgbar, &vmd->cfgbar_size);
1150 	if (rc == 0) {
1151 		rc = spdk_pci_device_map_bar(dev, 2, (void **)&vmd->mem_vaddr,
1152 					     &vmd->membar, &vmd->membar_size);
1153 	}
1154 
1155 	if (rc == 0) {
1156 		rc = spdk_pci_device_map_bar(dev, 4, (void **)&vmd->msix_vaddr,
1157 					     &vmd->msixbar, &vmd->msixbar_size);
1158 	}
1159 
1160 	if (rc == 0) {
1161 		vmd->physical_addr = vmd->membar;
1162 		vmd->current_addr_size = vmd->membar_size;
1163 	}
1164 	return rc;
1165 }
1166 
1167 static void
1168 vmd_set_starting_bus_number(struct vmd_adapter *vmd, uint8_t *bus_start,
1169 			    uint8_t *max_bus)
1170 {
1171 	uint32_t vmd_cap = 0, vmd_config = 0;
1172 	uint8_t bus_restrict_cap, bus_restrictions;
1173 
1174 	spdk_pci_device_cfg_read32(vmd->pci, &vmd_cap, PCI_VMD_VMCAP);
1175 	spdk_pci_device_cfg_read32(vmd->pci, &vmd_config, PCI_VMD_VMCONFIG);
1176 
1177 	bus_restrict_cap = vmd_cap & 0x1; /* bit 0 */
1178 	bus_restrictions = (vmd_config >> 8) & 0x3; /* bits 8-9 */
1179 	if ((bus_restrict_cap == 0x1) && (bus_restrictions == 0x1)) {
1180 		*bus_start = 128;
1181 		*max_bus = 255;
1182 	} else {
1183 		*bus_start = 0;
1184 		*max_bus = 127;
1185 	}
1186 }
1187 
1188 static int
1189 vmd_enumerate_devices(struct vmd_adapter *vmd)
1190 {
1191 	uint8_t max_bus, bus_start;
1192 
1193 	vmd->vmd_bus.vmd = vmd;
1194 	vmd->vmd_bus.domain = vmd->pci->addr.domain;
1195 
1196 	if (vmd->pci->id.device_id == PCI_DEVICE_ID_INTEL_VMD_ICX) {
1197 		vmd_set_starting_bus_number(vmd, &bus_start, &max_bus);
1198 		vmd->vmd_bus.bus_start = bus_start;
1199 		vmd->vmd_bus.secondary_bus = vmd->vmd_bus.subordinate_bus = vmd->vmd_bus.bus_start;
1200 		vmd->vmd_bus.primary_bus = vmd->vmd_bus.bus_number = vmd->vmd_bus.bus_start;
1201 		vmd->max_pci_bus = max_bus;
1202 	} else {
1203 		vmd->vmd_bus.bus_start = 0;
1204 		vmd->vmd_bus.secondary_bus = vmd->vmd_bus.subordinate_bus = 0;
1205 		vmd->vmd_bus.primary_bus = vmd->vmd_bus.bus_number = 0;
1206 		vmd->max_pci_bus = PCI_MAX_BUS_NUMBER;
1207 	}
1208 
1209 	return vmd_scan_pcibus(&vmd->vmd_bus);
1210 }
1211 
1212 struct vmd_pci_device *
1213 vmd_find_device(const struct spdk_pci_addr *addr)
1214 {
1215 	struct vmd_pci_bus *bus;
1216 	struct vmd_pci_device *dev;
1217 	int i;
1218 
1219 	for (i = 0; i < MAX_VMD_TARGET; ++i) {
1220 		TAILQ_FOREACH(bus, &g_vmd_container.vmd[i].bus_list, tailq) {
1221 			if (bus->self) {
1222 				if (spdk_pci_addr_compare(&bus->self->pci.addr, addr) == 0) {
1223 					return bus->self;
1224 				}
1225 			}
1226 
1227 			TAILQ_FOREACH(dev, &bus->dev_list, tailq) {
1228 				if (spdk_pci_addr_compare(&dev->pci.addr, addr) == 0) {
1229 					return dev;
1230 				}
1231 			}
1232 		}
1233 	}
1234 
1235 	return NULL;
1236 }
1237 
1238 static int
1239 vmd_enum_cb(void *ctx, struct spdk_pci_device *pci_dev)
1240 {
1241 	uint32_t cmd_reg = 0;
1242 	char bdf[32] = {0};
1243 	struct vmd_container *vmd_c = ctx;
1244 	size_t i;
1245 
1246 	spdk_pci_device_cfg_read32(pci_dev, &cmd_reg, 4);
1247 	cmd_reg |= 0x6;                      /* PCI bus master/memory enable. */
1248 	spdk_pci_device_cfg_write32(pci_dev, cmd_reg, 4);
1249 
1250 	spdk_pci_addr_fmt(bdf, sizeof(bdf), &pci_dev->addr);
1251 	SPDK_DEBUGLOG(vmd, "Found a VMD[ %d ] at %s\n", vmd_c->count, bdf);
1252 
1253 	/* map vmd bars */
1254 	i = vmd_c->count;
1255 	vmd_c->vmd[i].pci = pci_dev;
1256 	vmd_c->vmd[i].vmd_index = i;
1257 	vmd_c->vmd[i].domain =
1258 		(pci_dev->addr.bus << 16) | (pci_dev->addr.dev << 8) | pci_dev->addr.func;
1259 	TAILQ_INIT(&vmd_c->vmd[i].bus_list);
1260 
1261 	if (vmd_map_bars(&vmd_c->vmd[i], pci_dev) == -1) {
1262 		return -1;
1263 	}
1264 
1265 	SPDK_DEBUGLOG(vmd, "vmd config bar(%p) vaddr(%p) size(%x)\n",
1266 		      (void *)vmd_c->vmd[i].cfgbar, (void *)vmd_c->vmd[i].cfg_vaddr,
1267 		      (uint32_t)vmd_c->vmd[i].cfgbar_size);
1268 	SPDK_DEBUGLOG(vmd, "vmd mem bar(%p) vaddr(%p) size(%x)\n",
1269 		      (void *)vmd_c->vmd[i].membar, (void *)vmd_c->vmd[i].mem_vaddr,
1270 		      (uint32_t)vmd_c->vmd[i].membar_size);
1271 	SPDK_DEBUGLOG(vmd, "vmd msix bar(%p) vaddr(%p) size(%x)\n\n",
1272 		      (void *)vmd_c->vmd[i].msixbar, (void *)vmd_c->vmd[i].msix_vaddr,
1273 		      (uint32_t)vmd_c->vmd[i].msixbar_size);
1274 
1275 	vmd_c->count = i + 1;
1276 
1277 	vmd_enumerate_devices(&vmd_c->vmd[i]);
1278 
1279 	return 0;
1280 }
1281 
1282 int
1283 spdk_vmd_pci_device_list(struct spdk_pci_addr vmd_addr, struct spdk_pci_device *nvme_list)
1284 {
1285 	int cnt = 0;
1286 	struct vmd_pci_bus *bus;
1287 	struct vmd_pci_device *dev;
1288 
1289 	if (!nvme_list) {
1290 		return -1;
1291 	}
1292 
1293 	for (int i = 0; i < MAX_VMD_TARGET; ++i) {
1294 		if (spdk_pci_addr_compare(&vmd_addr, &g_vmd_container.vmd[i].pci->addr) == 0) {
1295 			TAILQ_FOREACH(bus, &g_vmd_container.vmd[i].bus_list, tailq) {
1296 				TAILQ_FOREACH(dev, &bus->dev_list, tailq) {
1297 					nvme_list[cnt++] = dev->pci;
1298 					if (!dev->is_hooked) {
1299 						vmd_dev_init(dev);
1300 						dev->is_hooked = 1;
1301 					}
1302 				}
1303 			}
1304 		}
1305 	}
1306 
1307 	return cnt;
1308 }
1309 
1310 static void
1311 vmd_clear_hotplug_status(struct vmd_pci_bus *bus)
1312 {
1313 	struct vmd_pci_device *device = bus->self;
1314 	uint16_t status __attribute__((unused));
1315 
1316 	status = device->pcie_cap->slot_status.as_uint16_t;
1317 	device->pcie_cap->slot_status.as_uint16_t = status;
1318 	status = device->pcie_cap->slot_status.as_uint16_t;
1319 
1320 	status = device->pcie_cap->link_status.as_uint16_t;
1321 	device->pcie_cap->link_status.as_uint16_t = status;
1322 	status = device->pcie_cap->link_status.as_uint16_t;
1323 }
1324 
1325 static void
1326 vmd_bus_handle_hotplug(struct vmd_pci_bus *bus)
1327 {
1328 	uint8_t num_devices, sleep_count;
1329 
1330 	for (sleep_count = 0; sleep_count < 20; ++sleep_count) {
1331 		/* Scan until a new device is found */
1332 		num_devices = vmd_scan_single_bus(bus, bus->self);
1333 		if (num_devices > 0) {
1334 			break;
1335 		}
1336 
1337 		spdk_delay_us(200000);
1338 	}
1339 
1340 	if (num_devices == 0) {
1341 		SPDK_ERRLOG("Timed out while scanning for hotplugged devices\n");
1342 	}
1343 }
1344 
1345 static void
1346 vmd_bus_handle_hotremove(struct vmd_pci_bus *bus)
1347 {
1348 	struct vmd_pci_device *device, *tmpdev;
1349 
1350 	TAILQ_FOREACH_SAFE(device, &bus->dev_list, tailq, tmpdev) {
1351 		if (!vmd_bus_device_present(bus, device->devfn)) {
1352 			device->pci.internal.pending_removal = true;
1353 
1354 			/* If the device isn't attached, remove it immediately */
1355 			if (!device->pci.internal.attached) {
1356 				vmd_dev_detach(&device->pci);
1357 			}
1358 		}
1359 	}
1360 }
1361 
1362 int
1363 spdk_vmd_hotplug_monitor(void)
1364 {
1365 	struct vmd_pci_bus *bus;
1366 	struct vmd_pci_device *device;
1367 	int num_hotplugs = 0;
1368 	uint32_t i;
1369 
1370 	for (i = 0; i < g_vmd_container.count; ++i) {
1371 		TAILQ_FOREACH(bus, &g_vmd_container.vmd[i].bus_list, tailq) {
1372 			device = bus->self;
1373 			if (device == NULL || !device->hotplug_capable) {
1374 				continue;
1375 			}
1376 
1377 			if (device->pcie_cap->slot_status.bit_field.datalink_state_changed != 1) {
1378 				continue;
1379 			}
1380 
1381 			if (device->pcie_cap->link_status.bit_field.datalink_layer_active == 1) {
1382 				SPDK_DEBUGLOG(vmd, "Device hotplug detected on bus "
1383 					      "%"PRIu32"\n", bus->bus_number);
1384 				vmd_bus_handle_hotplug(bus);
1385 			} else {
1386 				SPDK_DEBUGLOG(vmd, "Device hotremove detected on bus "
1387 					      "%"PRIu32"\n", bus->bus_number);
1388 				vmd_bus_handle_hotremove(bus);
1389 			}
1390 
1391 			vmd_clear_hotplug_status(bus);
1392 			num_hotplugs++;
1393 		}
1394 	}
1395 
1396 	return num_hotplugs;
1397 }
1398 
1399 int
1400 spdk_vmd_init(void)
1401 {
1402 	return spdk_pci_enumerate(spdk_pci_vmd_get_driver(), vmd_enum_cb, &g_vmd_container);
1403 }
1404 
1405 void
1406 spdk_vmd_fini(void)
1407 {
1408 	uint32_t i;
1409 
1410 	for (i = 0; i < g_vmd_container.count; ++i) {
1411 		spdk_pci_device_detach(g_vmd_container.vmd[i].pci);
1412 	}
1413 }
1414 
1415 SPDK_LOG_REGISTER_COMPONENT(vmd)
1416