xref: /spdk/lib/vmd/vmd.c (revision 9544fe07aad355262fcaa65dc27f9965a8ea4617)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "vmd.h"
35 
36 #include "spdk/stdinc.h"
37 #include "spdk/likely.h"
38 
39 static unsigned char *device_type[] = {
40 	"PCI Express Endpoint",
41 	"Legacy PCI Express Endpoint",
42 	"Reserved 1",
43 	"Reserved 2",
44 	"Root Port of PCI Express Root Complex",
45 	"Upstream Port of PCI Express Switch",
46 	"Downstream Port of PCI Express Switch",
47 	"PCI Express to PCI/PCI-X Bridge",
48 	"PCI/PCI-X to PCI Express Bridge",
49 	"Root Complex Integrated Endpoint",
50 	"Root Complex Event Collector",
51 	"Reserved Capability"
52 };
53 
54 /*
55  * Container for all VMD adapter probed in the system.
56  */
57 struct vmd_container {
58 	uint32_t count;
59 	struct vmd_adapter vmd[MAX_VMD_SUPPORTED];
60 };
61 
62 static struct vmd_container g_vmd_container;
63 static uint8_t g_end_device_count;
64 
65 static bool
66 vmd_is_valid_cfg_addr(struct vmd_pci_bus *bus, uint64_t addr)
67 {
68 	return addr >= (uint64_t)bus->vmd->cfg_vaddr &&
69 	       addr < bus->vmd->cfgbar_size + (uint64_t)bus->vmd->cfg_vaddr;
70 }
71 
72 static void
73 vmd_align_base_addrs(struct vmd_adapter *vmd, uint32_t alignment)
74 {
75 	uint32_t pad;
76 
77 	/*
78 	 *  Device is not in hot plug path, align the base address remaining from membar 1.
79 	 */
80 	if (vmd->physical_addr & (alignment - 1)) {
81 		pad = alignment - (vmd->physical_addr & (alignment - 1));
82 		vmd->physical_addr += pad;
83 		vmd->current_addr_size -= pad;
84 	}
85 }
86 
87 static bool
88 vmd_device_is_enumerated(const struct vmd_pci_device *vmd_device)
89 {
90 	return vmd_device->header->one.prefetch_base_upper == VMD_UPPER_BASE_SIGNATURE &&
91 	       vmd_device->header->one.prefetch_limit_upper == VMD_UPPER_LIMIT_SIGNATURE;
92 }
93 
94 static bool
95 vmd_device_is_root_port(const struct vmd_pci_device *vmd_device)
96 {
97 	return vmd_device->header->common.vendor_id == SPDK_PCI_VID_INTEL &&
98 	       (vmd_device->header->common.device_id == PCI_ROOT_PORT_A_INTEL_SKX ||
99 		vmd_device->header->common.device_id == PCI_ROOT_PORT_B_INTEL_SKX ||
100 		vmd_device->header->common.device_id == PCI_ROOT_PORT_C_INTEL_SKX ||
101 		vmd_device->header->common.device_id == PCI_ROOT_PORT_D_INTEL_SKX ||
102 		vmd_device->header->common.device_id == PCI_ROOT_PORT_A_INTEL_ICX ||
103 		vmd_device->header->common.device_id == PCI_ROOT_PORT_B_INTEL_ICX ||
104 		vmd_device->header->common.device_id == PCI_ROOT_PORT_C_INTEL_ICX ||
105 		vmd_device->header->common.device_id == PCI_ROOT_PORT_D_INTEL_ICX);
106 }
107 
108 static void
109 vmd_hotplug_coalesce_regions(struct vmd_hot_plug *hp)
110 {
111 	struct pci_mem_mgr *region, *prev;
112 
113 	do {
114 		prev = NULL;
115 		TAILQ_FOREACH(region, &hp->free_mem_queue, tailq) {
116 			if (prev != NULL && (prev->addr + prev->size == region->addr)) {
117 				break;
118 			}
119 
120 			prev = region;
121 		}
122 
123 		if (region != NULL) {
124 			prev->size += region->size;
125 			TAILQ_REMOVE(&hp->free_mem_queue, region, tailq);
126 			TAILQ_INSERT_TAIL(&hp->unused_mem_queue, region, tailq);
127 		}
128 	} while (region != NULL);
129 }
130 
131 static void
132 vmd_hotplug_free_region(struct vmd_hot_plug *hp, struct pci_mem_mgr *region)
133 {
134 	struct pci_mem_mgr *current, *prev = NULL;
135 
136 	assert(region->addr >= hp->bar.start && region->addr < hp->bar.start + hp->bar.size);
137 
138 	TAILQ_FOREACH(current, &hp->free_mem_queue, tailq) {
139 		if (current->addr > region->addr) {
140 			break;
141 		}
142 
143 		prev = current;
144 	}
145 
146 	if (prev != NULL) {
147 		assert(prev->addr + prev->size <= region->addr);
148 		assert(current == NULL || (region->addr + region->size <= current->addr));
149 		TAILQ_INSERT_AFTER(&hp->free_mem_queue, prev, region, tailq);
150 	} else {
151 		TAILQ_INSERT_HEAD(&hp->free_mem_queue, region, tailq);
152 	}
153 
154 	vmd_hotplug_coalesce_regions(hp);
155 }
156 
157 static void
158 vmd_hotplug_free_addr(struct vmd_hot_plug *hp, uint64_t addr)
159 {
160 	struct pci_mem_mgr *region;
161 
162 	TAILQ_FOREACH(region, &hp->alloc_mem_queue, tailq) {
163 		if (region->addr == addr) {
164 			break;
165 		}
166 	}
167 
168 	assert(region != NULL);
169 	TAILQ_REMOVE(&hp->alloc_mem_queue, region, tailq);
170 
171 	vmd_hotplug_free_region(hp, region);
172 }
173 
174 static uint64_t
175 vmd_hotplug_allocate_base_addr(struct vmd_hot_plug *hp, uint32_t size)
176 {
177 	struct pci_mem_mgr *region = NULL, *free_region;
178 
179 	TAILQ_FOREACH(region, &hp->free_mem_queue, tailq) {
180 		if (region->size >= size) {
181 			break;
182 		}
183 	}
184 
185 	if (region == NULL) {
186 		SPDK_DEBUGLOG(vmd, "Unable to find free hotplug memory region of size:"
187 			      "%"PRIx32"\n", size);
188 		return 0;
189 	}
190 
191 	TAILQ_REMOVE(&hp->free_mem_queue, region, tailq);
192 	if (size < region->size) {
193 		free_region = TAILQ_FIRST(&hp->unused_mem_queue);
194 		if (free_region == NULL) {
195 			SPDK_DEBUGLOG(vmd, "Unable to find unused descriptor to store the "
196 				      "free region of size: %"PRIu32"\n", region->size - size);
197 		} else {
198 			TAILQ_REMOVE(&hp->unused_mem_queue, free_region, tailq);
199 			free_region->size = region->size - size;
200 			free_region->addr = region->addr + size;
201 			region->size = size;
202 			vmd_hotplug_free_region(hp, free_region);
203 		}
204 	}
205 
206 	TAILQ_INSERT_TAIL(&hp->alloc_mem_queue, region, tailq);
207 
208 	return region->addr;
209 }
210 
211 /*
212  *  Allocates an address from vmd membar for the input memory size
213  *  vmdAdapter - vmd adapter object
214  *  dev - vmd_pci_device to allocate a base address for.
215  *  size - size of the memory window requested.
216  *  Size must be an integral multiple of 2. Addresses are returned on the size boundary.
217  *  Returns physical address within the VMD membar window, or 0x0 if cannot allocate window.
218  *  Consider increasing the size of vmd membar if 0x0 is returned.
219  */
220 static uint64_t
221 vmd_allocate_base_addr(struct vmd_adapter *vmd, struct vmd_pci_device *dev, uint32_t size)
222 {
223 	uint64_t base_address = 0, padding = 0;
224 	struct vmd_pci_bus *hp_bus;
225 
226 	if (size && ((size & (~size + 1)) != size)) {
227 		return base_address;
228 	}
229 
230 	/*
231 	 *  If device is downstream of a hot plug port, allocate address from the
232 	 *  range dedicated for the hot plug slot. Search the list of addresses allocated to determine
233 	 *  if a free range exists that satisfy the input request.  If a free range cannot be found,
234 	 *  get a buffer from the  unused chunk. First fit algorithm, is used.
235 	 */
236 	if (dev) {
237 		hp_bus = dev->parent;
238 		if (hp_bus && hp_bus->self && hp_bus->self->hotplug_capable) {
239 			return vmd_hotplug_allocate_base_addr(&hp_bus->self->hp, size);
240 		}
241 	}
242 
243 	/* Ensure physical membar allocated is size aligned */
244 	if (vmd->physical_addr & (size - 1)) {
245 		padding = size - (vmd->physical_addr & (size - 1));
246 	}
247 
248 	/* Allocate from membar if enough memory is left */
249 	if (vmd->current_addr_size >= size + padding) {
250 		base_address = vmd->physical_addr + padding;
251 		vmd->physical_addr += size + padding;
252 		vmd->current_addr_size -= size + padding;
253 	}
254 
255 	SPDK_DEBUGLOG(vmd, "allocated(size) %" PRIx64 " (%x)\n", base_address, size);
256 
257 	return base_address;
258 }
259 
260 static bool
261 vmd_is_end_device(struct vmd_pci_device *dev)
262 {
263 	return (dev && dev->header) &&
264 	       ((dev->header->common.header_type & ~PCI_MULTI_FUNCTION) == PCI_HEADER_TYPE_NORMAL);
265 }
266 
267 static void
268 vmd_update_base_limit_register(struct vmd_pci_device *dev, uint16_t base, uint16_t limit)
269 {
270 	struct vmd_pci_bus *bus;
271 	struct vmd_pci_device *bridge;
272 
273 	if (base == 0 ||  limit == 0) {
274 		return;
275 	}
276 
277 	if (dev->header->common.header_type == PCI_HEADER_TYPE_BRIDGE) {
278 		bus = dev->bus_object;
279 	} else {
280 		bus = dev->parent;
281 	}
282 
283 	bridge = bus->self;
284 	SPDK_DEBUGLOG(vmd, "base:limit = %x:%x\n", bridge->header->one.mem_base,
285 		      bridge->header->one.mem_limit);
286 
287 	if (dev->bus->vmd->scan_completed) {
288 		return;
289 	}
290 
291 	while (bus && bus->self != NULL) {
292 		bridge = bus->self;
293 
294 		/* This is only for 32-bit memory space, need to revisit to support 64-bit */
295 		if (bridge->header->one.mem_base > base) {
296 			bridge->header->one.mem_base = base;
297 			base = bridge->header->one.mem_base;
298 		}
299 
300 		if (bridge->header->one.mem_limit < limit) {
301 			bridge->header->one.mem_limit = limit;
302 			limit = bridge->header->one.mem_limit;
303 		}
304 
305 		bus = bus->parent;
306 	}
307 }
308 
309 static uint64_t
310 vmd_get_base_addr(struct vmd_pci_device *dev, uint32_t index, uint32_t size)
311 {
312 	struct vmd_pci_bus *bus = dev->parent;
313 
314 	if (dev->header_type == PCI_HEADER_TYPE_BRIDGE) {
315 		return dev->header->zero.BAR[index] & ~0xf;
316 	} else {
317 		if (bus->self->hotplug_capable) {
318 			return vmd_hotplug_allocate_base_addr(&bus->self->hp, size);
319 		} else {
320 			return (uint64_t)bus->self->header->one.mem_base << 16;
321 		}
322 	}
323 }
324 
325 static bool
326 vmd_assign_base_addrs(struct vmd_pci_device *dev)
327 {
328 	uint16_t mem_base = 0, mem_limit = 0;
329 	unsigned char mem_attr = 0;
330 	int last;
331 	struct vmd_adapter *vmd = NULL;
332 	bool ret_val = false;
333 	uint32_t bar_value;
334 	uint32_t table_offset;
335 
336 	if (dev && dev->bus) {
337 		vmd = dev->bus->vmd;
338 	}
339 
340 	if (!vmd) {
341 		return 0;
342 	}
343 
344 	vmd_align_base_addrs(vmd, ONE_MB);
345 
346 	last = dev->header_type ? 2 : 6;
347 	for (int i = 0; i < last; i++) {
348 		bar_value = dev->header->zero.BAR[i];
349 		dev->header->zero.BAR[i] = ~(0U);
350 		dev->bar[i].size = dev->header->zero.BAR[i];
351 		dev->header->zero.BAR[i] = bar_value;
352 
353 		if (dev->bar[i].size == ~(0U) || dev->bar[i].size == 0  ||
354 		    dev->header->zero.BAR[i] & 1) {
355 			dev->bar[i].size = 0;
356 			continue;
357 		}
358 		mem_attr = dev->bar[i].size & PCI_BASE_ADDR_MASK;
359 		dev->bar[i].size = TWOS_COMPLEMENT(dev->bar[i].size & PCI_BASE_ADDR_MASK);
360 
361 		if (vmd->scan_completed) {
362 			dev->bar[i].start = vmd_get_base_addr(dev, i, dev->bar[i].size);
363 		} else {
364 			dev->bar[i].start = vmd_allocate_base_addr(vmd, dev, dev->bar[i].size);
365 		}
366 
367 		dev->header->zero.BAR[i] = (uint32_t)dev->bar[i].start;
368 
369 		if (!dev->bar[i].start) {
370 			if (mem_attr == (PCI_BAR_MEMORY_PREFETCH | PCI_BAR_MEMORY_TYPE_64)) {
371 				i++;
372 			}
373 			continue;
374 		}
375 
376 		dev->bar[i].vaddr = ((uint64_t)vmd->mem_vaddr + (dev->bar[i].start - vmd->membar));
377 		mem_limit = BRIDGE_BASEREG(dev->header->zero.BAR[i]) +
378 			    BRIDGE_BASEREG(dev->bar[i].size - 1);
379 		if (!mem_base) {
380 			mem_base = BRIDGE_BASEREG(dev->header->zero.BAR[i]);
381 		}
382 
383 		ret_val = true;
384 
385 		if (mem_attr == (PCI_BAR_MEMORY_PREFETCH | PCI_BAR_MEMORY_TYPE_64)) {
386 			i++;
387 			if (i < last) {
388 				dev->header->zero.BAR[i] = (uint32_t)(dev->bar[i].start >> PCI_DWORD_SHIFT);
389 			}
390 		}
391 	}
392 
393 	/* Enable device MEM and bus mastering */
394 	dev->header->zero.command |= (PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
395 	uint16_t cmd = dev->header->zero.command;
396 	cmd++;
397 
398 	if (dev->msix_cap && ret_val) {
399 		table_offset = ((volatile struct pci_msix_cap *)dev->msix_cap)->msix_table_offset;
400 		if (dev->bar[table_offset & 0x3].vaddr) {
401 			dev->msix_table = (volatile struct pci_msix_table_entry *)
402 					  (dev->bar[table_offset & 0x3].vaddr + (table_offset & 0xfff8));
403 		}
404 	}
405 
406 	if (ret_val && vmd_is_end_device(dev)) {
407 		vmd_update_base_limit_register(dev, mem_base, mem_limit);
408 	}
409 
410 	return ret_val;
411 }
412 
413 static void
414 vmd_get_device_capabilities(struct vmd_pci_device *dev)
415 
416 {
417 	volatile uint8_t *config_space;
418 	uint8_t capabilities_offset;
419 	struct pci_capabilities_header *capabilities_hdr;
420 
421 	config_space = (volatile uint8_t *)dev->header;
422 	if ((dev->header->common.status  & PCI_CAPABILITIES_LIST) == 0) {
423 		return;
424 	}
425 
426 	capabilities_offset = dev->header->zero.cap_pointer;
427 	if (dev->header->common.header_type & PCI_HEADER_TYPE_BRIDGE) {
428 		capabilities_offset = dev->header->one.cap_pointer;
429 	}
430 
431 	while (capabilities_offset > 0) {
432 		capabilities_hdr = (struct pci_capabilities_header *)
433 				   &config_space[capabilities_offset];
434 		switch (capabilities_hdr->capability_id) {
435 		case CAPABILITY_ID_PCI_EXPRESS:
436 			dev->pcie_cap = (volatile struct pci_express_cap *)(capabilities_hdr);
437 			break;
438 
439 		case CAPABILITY_ID_MSI:
440 			dev->msi_cap = (volatile struct pci_msi_cap *)capabilities_hdr;
441 			break;
442 
443 		case CAPABILITY_ID_MSIX:
444 			dev->msix_cap = (volatile struct pci_msix_capability *)capabilities_hdr;
445 			dev->msix_table_size = dev->msix_cap->message_control.bit.table_size + 1;
446 			break;
447 
448 		default:
449 			break;
450 		}
451 		capabilities_offset = capabilities_hdr->next;
452 	}
453 }
454 
455 static volatile struct pci_enhanced_capability_header *
456 vmd_get_enhanced_capabilities(struct vmd_pci_device *dev, uint16_t capability_id)
457 {
458 	uint8_t *data;
459 	uint16_t cap_offset = EXTENDED_CAPABILITY_OFFSET;
460 	volatile struct pci_enhanced_capability_header *cap_hdr = NULL;
461 
462 	data = (uint8_t *)dev->header;
463 	while (cap_offset >= EXTENDED_CAPABILITY_OFFSET) {
464 		cap_hdr = (volatile struct pci_enhanced_capability_header *) &data[cap_offset];
465 		if (cap_hdr->capability_id == capability_id) {
466 			return cap_hdr;
467 		}
468 		cap_offset = cap_hdr->next;
469 		if (cap_offset == 0 || cap_offset < EXTENDED_CAPABILITY_OFFSET) {
470 			break;
471 		}
472 	}
473 
474 	return NULL;
475 }
476 
477 static void
478 vmd_read_config_space(struct vmd_pci_device *dev)
479 {
480 	/*
481 	 * Writes to the pci config space is posted weite. To ensure transaction reaches its destination
482 	 * before another write is posed, an immediate read of the written value should be performed.
483 	 */
484 	dev->header->common.command |= (BUS_MASTER_ENABLE | MEMORY_SPACE_ENABLE);
485 	{ uint16_t cmd = dev->header->common.command; (void)cmd; }
486 
487 	vmd_get_device_capabilities(dev);
488 	dev->sn_cap = (struct serial_number_capability *)vmd_get_enhanced_capabilities(dev,
489 			DEVICE_SERIAL_NUMBER_CAP_ID);
490 }
491 
492 static void
493 vmd_update_scan_info(struct vmd_pci_device *dev)
494 {
495 	struct vmd_adapter *vmd_adapter = dev->bus->vmd;
496 
497 	if (vmd_adapter->root_port_updated) {
498 		return;
499 	}
500 
501 	if (dev->header_type == PCI_HEADER_TYPE_NORMAL) {
502 		return;
503 	}
504 
505 	if (vmd_device_is_root_port(dev)) {
506 		vmd_adapter->root_port_updated = 1;
507 		SPDK_DEBUGLOG(vmd, "root_port_updated = %d\n",
508 			      vmd_adapter->root_port_updated);
509 		SPDK_DEBUGLOG(vmd, "upper:limit = %x : %x\n",
510 			      dev->header->one.prefetch_base_upper,
511 			      dev->header->one.prefetch_limit_upper);
512 		if (vmd_device_is_enumerated(dev)) {
513 			vmd_adapter->scan_completed = 1;
514 			SPDK_DEBUGLOG(vmd, "scan_completed = %d\n",
515 				      vmd_adapter->scan_completed);
516 		}
517 	}
518 }
519 
520 static void
521 vmd_reset_base_limit_registers(struct vmd_pci_device *dev)
522 {
523 	uint32_t reg __attribute__((unused));
524 
525 	assert(dev->header_type != PCI_HEADER_TYPE_NORMAL);
526 	/*
527 	 * Writes to the pci config space are posted writes.
528 	 * To ensure transaction reaches its destination
529 	 * before another write is posted, an immediate read
530 	 * of the written value should be performed.
531 	 */
532 	dev->header->one.mem_base = 0xfff0;
533 	reg = dev->header->one.mem_base;
534 	dev->header->one.mem_limit = 0x0;
535 	reg = dev->header->one.mem_limit;
536 	dev->header->one.prefetch_base = 0x0;
537 	reg = dev->header->one.prefetch_base;
538 	dev->header->one.prefetch_limit = 0x0;
539 	reg = dev->header->one.prefetch_limit;
540 	dev->header->one.prefetch_base_upper = 0x0;
541 	reg = dev->header->one.prefetch_base_upper;
542 	dev->header->one.prefetch_limit_upper = 0x0;
543 	reg = dev->header->one.prefetch_limit_upper;
544 	dev->header->one.io_base_upper = 0x0;
545 	reg = dev->header->one.io_base_upper;
546 	dev->header->one.io_limit_upper = 0x0;
547 	reg = dev->header->one.io_limit_upper;
548 	dev->header->one.primary = 0;
549 	reg = dev->header->one.primary;
550 	dev->header->one.secondary = 0;
551 	reg = dev->header->one.secondary;
552 	dev->header->one.subordinate = 0;
553 	reg = dev->header->one.subordinate;
554 }
555 
556 static void
557 vmd_init_hotplug(struct vmd_pci_device *dev, struct vmd_pci_bus *bus)
558 {
559 	struct vmd_adapter *vmd = bus->vmd;
560 	struct vmd_hot_plug *hp = &dev->hp;
561 	size_t mem_id;
562 
563 	dev->hotplug_capable = true;
564 	hp->bar.size = 1 << 20;
565 
566 	if (!vmd->scan_completed) {
567 		hp->bar.start = vmd_allocate_base_addr(vmd, NULL, hp->bar.size);
568 		bus->self->header->one.mem_base = BRIDGE_BASEREG(hp->bar.start);
569 		bus->self->header->one.mem_limit =
570 			bus->self->header->one.mem_base + BRIDGE_BASEREG(hp->bar.size - 1);
571 	} else {
572 		hp->bar.start = (uint64_t)bus->self->header->one.mem_base << 16;
573 	}
574 
575 	hp->bar.vaddr = (uint64_t)vmd->mem_vaddr + (hp->bar.start - vmd->membar);
576 
577 	TAILQ_INIT(&hp->free_mem_queue);
578 	TAILQ_INIT(&hp->unused_mem_queue);
579 	TAILQ_INIT(&hp->alloc_mem_queue);
580 
581 	hp->mem[0].size = hp->bar.size;
582 	hp->mem[0].addr = hp->bar.start;
583 
584 	TAILQ_INSERT_TAIL(&hp->free_mem_queue, &hp->mem[0], tailq);
585 
586 	for (mem_id = 1; mem_id < ADDR_ELEM_COUNT; ++mem_id) {
587 		TAILQ_INSERT_TAIL(&hp->unused_mem_queue, &hp->mem[mem_id], tailq);
588 	}
589 
590 	SPDK_DEBUGLOG(vmd, "%s: mem_base:mem_limit = %x : %x\n", __func__,
591 		      bus->self->header->one.mem_base, bus->self->header->one.mem_limit);
592 }
593 
594 static bool
595 vmd_bus_device_present(struct vmd_pci_bus *bus, uint32_t devfn)
596 {
597 	volatile struct pci_header *header;
598 
599 	header = (volatile struct pci_header *)(bus->vmd->cfg_vaddr +
600 						CONFIG_OFFSET_ADDR(bus->config_bus_number, devfn, 0, 0));
601 	if (!vmd_is_valid_cfg_addr(bus, (uint64_t)header)) {
602 		return false;
603 	}
604 
605 	if (header->common.vendor_id == PCI_INVALID_VENDORID || header->common.vendor_id == 0x0) {
606 		return false;
607 	}
608 
609 	return true;
610 }
611 
612 static struct vmd_pci_device *
613 vmd_alloc_dev(struct vmd_pci_bus *bus, uint32_t devfn)
614 {
615 	struct vmd_pci_device *dev = NULL;
616 	struct pci_header volatile *header;
617 	uint8_t header_type;
618 	uint32_t rev_class;
619 
620 	/* Make sure we're not creating two devices on the same dev/fn */
621 	TAILQ_FOREACH(dev, &bus->dev_list, tailq) {
622 		if (dev->devfn == devfn) {
623 			return NULL;
624 		}
625 	}
626 
627 	if (!vmd_bus_device_present(bus, devfn)) {
628 		return NULL;
629 	}
630 
631 	header = (struct pci_header * volatile)(bus->vmd->cfg_vaddr +
632 						CONFIG_OFFSET_ADDR(bus->config_bus_number, devfn, 0, 0));
633 
634 	SPDK_DEBUGLOG(vmd, "PCI device found: %04x:%04x ***\n",
635 		      header->common.vendor_id, header->common.device_id);
636 
637 	dev = calloc(1, sizeof(*dev));
638 	if (!dev) {
639 		return NULL;
640 	}
641 
642 	dev->header = header;
643 	dev->vid = dev->header->common.vendor_id;
644 	dev->did = dev->header->common.device_id;
645 	dev->bus = bus;
646 	dev->parent = bus;
647 	dev->devfn = devfn;
648 	header_type = dev->header->common.header_type;
649 	rev_class = dev->header->common.rev_class;
650 	dev->class = rev_class >> 8;
651 	dev->header_type = header_type & 0x7;
652 
653 	if (header_type == PCI_HEADER_TYPE_BRIDGE) {
654 		vmd_update_scan_info(dev);
655 		if (!dev->bus->vmd->scan_completed) {
656 			vmd_reset_base_limit_registers(dev);
657 		}
658 	}
659 
660 	vmd_read_config_space(dev);
661 
662 	return dev;
663 }
664 
665 static struct vmd_pci_bus *
666 vmd_create_new_bus(struct vmd_pci_bus *parent, struct vmd_pci_device *bridge, uint8_t bus_number)
667 {
668 	struct vmd_pci_bus *new_bus;
669 
670 	new_bus = calloc(1, sizeof(*new_bus));
671 	if (!new_bus) {
672 		return NULL;
673 	}
674 
675 	new_bus->parent = parent;
676 	new_bus->domain = parent->domain;
677 	new_bus->bus_number = bus_number;
678 	new_bus->secondary_bus = new_bus->subordinate_bus = bus_number;
679 	new_bus->self = bridge;
680 	new_bus->vmd = parent->vmd;
681 	new_bus->config_bus_number = new_bus->bus_number - new_bus->vmd->vmd_bus.bus_start;
682 	TAILQ_INIT(&new_bus->dev_list);
683 
684 	bridge->subordinate = new_bus;
685 
686 	bridge->pci.addr.bus = new_bus->bus_number;
687 	bridge->pci.addr.dev = bridge->devfn;
688 	bridge->pci.addr.func = 0;
689 	bridge->pci.addr.domain = parent->vmd->pci->addr.domain;
690 
691 	return new_bus;
692 }
693 
694 /*
695  * Assigns a bus number from the list of available
696  * bus numbers. If the device is downstream of a hot plug port,
697  * assign the bus number from thiose assigned to the HP port. Otherwise,
698  * assign the next bus number from the vmd bus number list.
699  */
700 static uint8_t
701 vmd_get_next_bus_number(struct vmd_pci_device *dev, struct vmd_adapter *vmd)
702 {
703 	uint8_t bus = 0xff;
704 	struct vmd_pci_bus *hp_bus;
705 
706 	if (dev) {
707 		hp_bus = vmd_is_dev_in_hotplug_path(dev);
708 		if (hp_bus && hp_bus->self && hp_bus->self->hotplug_capable) {
709 			return vmd_hp_get_next_bus_number(&hp_bus->self->hp);
710 		}
711 	}
712 
713 	/* Device is not under a hot plug path. Return next global bus number */
714 	if ((vmd->next_bus_number + 1) < vmd->max_pci_bus) {
715 		bus = vmd->next_bus_number;
716 		vmd->next_bus_number++;
717 	}
718 	return bus;
719 }
720 
721 static uint8_t
722 vmd_get_hotplug_bus_numbers(struct vmd_pci_device *dev)
723 {
724 	uint8_t bus_number = 0xff;
725 
726 	if (dev && dev->bus && dev->bus->vmd &&
727 	    ((dev->bus->vmd->next_bus_number + RESERVED_HOTPLUG_BUSES) < dev->bus->vmd->max_pci_bus)) {
728 		bus_number = RESERVED_HOTPLUG_BUSES;
729 		dev->bus->vmd->next_bus_number += RESERVED_HOTPLUG_BUSES;
730 	}
731 
732 	return bus_number;
733 }
734 
735 static void
736 vmd_enable_msix(struct vmd_pci_device *dev)
737 {
738 	volatile uint16_t control;
739 
740 	control = dev->msix_cap->message_control.as_uint16_t | (1 << 14);
741 	dev->msix_cap->message_control.as_uint16_t = control;
742 	control = dev->msix_cap->message_control.as_uint16_t;
743 	dev->msix_cap->message_control.as_uint16_t = (control | (1 << 15));
744 	control = dev->msix_cap->message_control.as_uint16_t;
745 	control = control & ~(1 << 14);
746 	dev->msix_cap->message_control.as_uint16_t = control;
747 	control = dev->msix_cap->message_control.as_uint16_t;
748 }
749 
750 static void
751 vmd_disable_msix(struct vmd_pci_device *dev)
752 {
753 	volatile uint16_t control;
754 
755 	control = dev->msix_cap->message_control.as_uint16_t | (1 << 14);
756 	dev->msix_cap->message_control.as_uint16_t = control;
757 	control = dev->msix_cap->message_control.as_uint16_t & ~(1 << 15);
758 	dev->msix_cap->message_control.as_uint16_t = control;
759 	control = dev->msix_cap->message_control.as_uint16_t;
760 }
761 
762 /*
763  * Set up MSI-X table entries for the port. Vmd MSIX vector 0 is used for
764  * port interrupt, so vector 0 is mapped to all MSIX entries for the port.
765  */
766 static void
767 vmd_setup_msix(struct vmd_pci_device *dev, volatile struct pci_msix_table_entry *vmdEntry)
768 {
769 	int entry;
770 
771 	if (!dev || !vmdEntry || !dev->msix_cap) {
772 		return;
773 	}
774 
775 	vmd_disable_msix(dev);
776 	if (dev->msix_table == NULL || dev->msix_table_size > MAX_MSIX_TABLE_SIZE) {
777 		return;
778 	}
779 
780 	for (entry = 0; entry < dev->msix_table_size; ++entry) {
781 		dev->msix_table[entry].vector_control = 1;
782 	}
783 	vmd_enable_msix(dev);
784 }
785 
786 static void
787 vmd_bus_update_bridge_info(struct vmd_pci_device *bridge)
788 {
789 	/* Update the subordinate bus of all bridges above this bridge */
790 	volatile struct vmd_pci_device *dev = bridge;
791 	uint8_t subordinate_bus;
792 
793 	if (!dev) {
794 		return;
795 	}
796 	subordinate_bus = bridge->header->one.subordinate;
797 	while (dev->parent_bridge != NULL) {
798 		dev = dev->parent_bridge;
799 		if (dev->header->one.subordinate < subordinate_bus) {
800 			dev->header->one.subordinate = subordinate_bus;
801 			subordinate_bus = dev->header->one.subordinate;
802 		}
803 	}
804 }
805 
806 static bool
807 vmd_is_supported_device(struct vmd_pci_device *dev)
808 {
809 	return dev->class == PCI_CLASS_STORAGE_EXPRESS;
810 }
811 
812 static int
813 vmd_dev_map_bar(struct spdk_pci_device *pci_dev, uint32_t bar,
814 		void **mapped_addr, uint64_t *phys_addr, uint64_t *size)
815 {
816 	struct vmd_pci_device *dev = SPDK_CONTAINEROF(pci_dev, struct vmd_pci_device, pci);
817 
818 	*size = dev->bar[bar].size;
819 	*phys_addr = dev->bar[bar].start;
820 	*mapped_addr = (void *)dev->bar[bar].vaddr;
821 
822 	return 0;
823 }
824 
825 static int
826 vmd_dev_unmap_bar(struct spdk_pci_device *_dev, uint32_t bar, void *addr)
827 {
828 	return 0;
829 }
830 
831 static int
832 vmd_dev_cfg_read(struct spdk_pci_device *_dev, void *value, uint32_t len,
833 		 uint32_t offset)
834 {
835 	struct vmd_pci_device *dev = SPDK_CONTAINEROF(_dev, struct vmd_pci_device, pci);
836 	volatile uint8_t *src = (volatile uint8_t *)dev->header;
837 	uint8_t *dst = value;
838 	size_t i;
839 
840 	if (len + offset > PCI_MAX_CFG_SIZE) {
841 		return -1;
842 	}
843 
844 	for (i = 0; i < len; ++i) {
845 		dst[i] = src[offset + i];
846 	}
847 
848 	return 0;
849 }
850 
851 static int
852 vmd_dev_cfg_write(struct spdk_pci_device *_dev,  void *value,
853 		  uint32_t len, uint32_t offset)
854 {
855 	struct vmd_pci_device *dev = SPDK_CONTAINEROF(_dev, struct vmd_pci_device, pci);
856 	volatile uint8_t *dst = (volatile uint8_t *)dev->header;
857 	uint8_t *src = value;
858 	size_t i;
859 
860 	if ((len + offset) > PCI_MAX_CFG_SIZE) {
861 		return -1;
862 	}
863 
864 	for (i = 0; i < len; ++i) {
865 		dst[offset + i] = src[i];
866 	}
867 
868 	return 0;
869 }
870 
871 static void
872 vmd_dev_detach(struct spdk_pci_device *dev)
873 {
874 	struct vmd_pci_device *vmd_device = (struct vmd_pci_device *)dev;
875 	struct vmd_pci_device *bus_device = vmd_device->bus->self;
876 	struct vmd_pci_bus *bus = vmd_device->bus;
877 	size_t i, num_bars = vmd_device->header_type ? 2 : 6;
878 
879 	spdk_pci_unhook_device(dev);
880 	TAILQ_REMOVE(&bus->dev_list, vmd_device, tailq);
881 
882 	/* Release the hotplug region if the device is under hotplug-capable bus */
883 	if (bus_device && bus_device->hotplug_capable) {
884 		for (i = 0; i < num_bars; ++i) {
885 			if (vmd_device->bar[i].start != 0) {
886 				vmd_hotplug_free_addr(&bus_device->hp, vmd_device->bar[i].start);
887 			}
888 		}
889 	}
890 
891 	free(dev);
892 }
893 
894 static void
895 vmd_dev_init(struct vmd_pci_device *dev)
896 {
897 	uint8_t bdf[32];
898 
899 	dev->pci.addr.domain = dev->bus->vmd->domain;
900 	dev->pci.addr.bus = dev->bus->bus_number;
901 	dev->pci.addr.dev = dev->devfn;
902 	dev->pci.addr.func = 0;
903 	dev->pci.id.vendor_id = dev->header->common.vendor_id;
904 	dev->pci.id.device_id = dev->header->common.device_id;
905 	dev->pci.type = "vmd";
906 	dev->pci.map_bar = vmd_dev_map_bar;
907 	dev->pci.unmap_bar = vmd_dev_unmap_bar;
908 	dev->pci.cfg_read = vmd_dev_cfg_read;
909 	dev->pci.cfg_write = vmd_dev_cfg_write;
910 	dev->hotplug_capable = false;
911 	if (dev->pcie_cap != NULL) {
912 		dev->cached_slot_control = dev->pcie_cap->slot_control;
913 	}
914 
915 	if (vmd_is_supported_device(dev)) {
916 		spdk_pci_addr_fmt(bdf, sizeof(bdf), &dev->pci.addr);
917 		SPDK_DEBUGLOG(vmd, "Initalizing NVMe device at %s\n", bdf);
918 		dev->pci.parent = dev->bus->vmd->pci;
919 		spdk_pci_hook_device(spdk_pci_nvme_get_driver(), &dev->pci);
920 	}
921 }
922 
923 /*
924  * Scans a single bus for all devices attached and return a count of
925  * how many devices found. In the VMD topology, it is assume there are no multi-
926  * function devices. Hence a bus(bridge) will not have multi function with both type
927  * 0 and 1 header.
928  *
929  * The other option  for implementing this function is the bus is an int and
930  * create a new device PciBridge. PciBridge would inherit from PciDevice with extra fields,
931  * sub/pri/sec bus. The input becomes PciPort, bus number and parent_bridge.
932  *
933  * The bus number is scanned and if a device is found, based on the header_type, create
934  * either PciBridge(1) or PciDevice(0).
935  *
936  * If a PciBridge, assign bus numbers and rescan new bus. The currenty PciBridge being
937  * scanned becomes the passed in parent_bridge with the new bus number.
938  *
939  * The linked list becomes list of pciBridges with PciDevices attached.
940  *
941  * Return count of how many devices found(type1 + type 0 header devices)
942  */
943 static uint8_t
944 vmd_scan_single_bus(struct vmd_pci_bus *bus, struct vmd_pci_device *parent_bridge)
945 {
946 	/* assuming only single function devices are on the bus */
947 	struct vmd_pci_device *new_dev;
948 	struct vmd_adapter *vmd;
949 	union express_slot_capabilities_register slot_cap;
950 	struct vmd_pci_bus *new_bus;
951 	uint8_t  device_number, dev_cnt = 0;
952 	uint8_t new_bus_num;
953 
954 	for (device_number = 0; device_number < 32; device_number++) {
955 		new_dev = vmd_alloc_dev(bus, device_number);
956 		if (new_dev == NULL) {
957 			continue;
958 		}
959 
960 		dev_cnt++;
961 		if (new_dev->header->common.header_type & PCI_HEADER_TYPE_BRIDGE) {
962 			slot_cap.as_uint32_t = 0;
963 			if (new_dev->pcie_cap != NULL) {
964 				slot_cap.as_uint32_t = new_dev->pcie_cap->slot_cap.as_uint32_t;
965 			}
966 
967 			new_bus_num = vmd_get_next_bus_number(bus->vmd->is_hotplug_scan ? new_dev : NULL, bus->vmd);
968 			if (new_bus_num == 0xff) {
969 				free(new_dev);
970 				return dev_cnt;
971 			}
972 			new_bus = vmd_create_new_bus(bus, new_dev, new_bus_num);
973 			if (!new_bus) {
974 				free(new_dev);
975 				return dev_cnt;
976 			}
977 			new_bus->primary_bus = bus->secondary_bus;
978 			new_bus->self = new_dev;
979 			new_dev->bus_object = new_bus;
980 
981 			if (slot_cap.bit_field.hotplug_capable && new_dev->pcie_cap != NULL &&
982 			    new_dev->pcie_cap->express_cap_register.bit_field.slot_implemented) {
983 				new_bus->hotplug_buses = vmd_get_hotplug_bus_numbers(new_dev);
984 				new_bus->subordinate_bus += new_bus->hotplug_buses;
985 
986 				/* Attach hot plug instance if HP is supported */
987 				/* Hot inserted SSDs can be assigned port bus of sub-ordinate + 1 */
988 				SPDK_DEBUGLOG(vmd, "hotplug_capable/slot_implemented = "
989 					      "%x:%x\n", slot_cap.bit_field.hotplug_capable,
990 					      new_dev->pcie_cap->express_cap_register.bit_field.slot_implemented);
991 			}
992 
993 			new_dev->parent_bridge = parent_bridge;
994 			new_dev->header->one.primary = new_bus->primary_bus;
995 			new_dev->header->one.secondary = new_bus->secondary_bus;
996 			new_dev->header->one.subordinate = new_bus->subordinate_bus;
997 
998 			vmd_bus_update_bridge_info(new_dev);
999 			TAILQ_INSERT_TAIL(&bus->vmd->bus_list, new_bus, tailq);
1000 
1001 			vmd_dev_init(new_dev);
1002 
1003 			if (slot_cap.bit_field.hotplug_capable && new_dev->pcie_cap != NULL &&
1004 			    new_dev->pcie_cap->express_cap_register.bit_field.slot_implemented) {
1005 				vmd_init_hotplug(new_dev, new_bus);
1006 			}
1007 
1008 			dev_cnt += vmd_scan_single_bus(new_bus, new_dev);
1009 			if (new_dev->pcie_cap != NULL) {
1010 				if (new_dev->pcie_cap->express_cap_register.bit_field.device_type == SwitchUpstreamPort) {
1011 					return dev_cnt;
1012 				}
1013 			}
1014 		} else {
1015 			/* Attach the device to the current bus and assign base addresses */
1016 			TAILQ_INSERT_TAIL(&bus->dev_list, new_dev, tailq);
1017 			g_end_device_count++;
1018 			if (vmd_assign_base_addrs(new_dev)) {
1019 				vmd_setup_msix(new_dev, &bus->vmd->msix_table[0]);
1020 				vmd_dev_init(new_dev);
1021 				if (vmd_is_supported_device(new_dev)) {
1022 					vmd = bus->vmd;
1023 					vmd->target[vmd->nvme_count] = new_dev;
1024 					vmd->nvme_count++;
1025 				}
1026 			} else {
1027 				SPDK_DEBUGLOG(vmd, "Removing failed device:%p\n", new_dev);
1028 				TAILQ_REMOVE(&bus->dev_list, new_dev, tailq);
1029 				free(new_dev);
1030 				if (dev_cnt) {
1031 					dev_cnt--;
1032 				}
1033 			}
1034 		}
1035 	}
1036 
1037 	return dev_cnt;
1038 }
1039 
1040 static void
1041 vmd_print_pci_info(struct vmd_pci_device *dev)
1042 {
1043 	if (!dev) {
1044 		return;
1045 	}
1046 
1047 	if (dev->pcie_cap != NULL) {
1048 		SPDK_INFOLOG(vmd, "PCI DEVICE: [%04X:%04X] type(%x) : %s\n",
1049 			     dev->header->common.vendor_id, dev->header->common.device_id,
1050 			     dev->pcie_cap->express_cap_register.bit_field.device_type,
1051 			     device_type[dev->pcie_cap->express_cap_register.bit_field.device_type]);
1052 	} else {
1053 		SPDK_INFOLOG(vmd, "PCI DEVICE: [%04X:%04X]\n",
1054 			     dev->header->common.vendor_id, dev->header->common.device_id);
1055 	}
1056 
1057 	SPDK_INFOLOG(vmd, "\tDOMAIN:BDF: %04x:%02x:%02x:%x\n", dev->pci.addr.domain,
1058 		     dev->pci.addr.bus, dev->pci.addr.dev, dev->pci.addr.func);
1059 
1060 	if (!(dev->header_type & PCI_HEADER_TYPE_BRIDGE) && dev->bus) {
1061 		SPDK_INFOLOG(vmd, "\tbase addr: %x : %p\n",
1062 			     dev->header->zero.BAR[0], (void *)dev->bar[0].vaddr);
1063 	}
1064 
1065 	if ((dev->header_type & PCI_HEADER_TYPE_BRIDGE)) {
1066 		SPDK_INFOLOG(vmd, "\tPrimary = %d, Secondary = %d, Subordinate = %d\n",
1067 			     dev->header->one.primary, dev->header->one.secondary, dev->header->one.subordinate);
1068 		if (dev->pcie_cap && dev->pcie_cap->express_cap_register.bit_field.slot_implemented) {
1069 			SPDK_INFOLOG(vmd, "\tSlot implemented on this device.\n");
1070 			if (dev->pcie_cap->slot_cap.bit_field.hotplug_capable) {
1071 				SPDK_INFOLOG(vmd, "Device has HOT-PLUG capable slot.\n");
1072 			}
1073 		}
1074 	}
1075 
1076 	if (dev->sn_cap != NULL) {
1077 		uint8_t *snLow = (uint8_t *)&dev->sn_cap->sn_low;
1078 		uint8_t *snHi = (uint8_t *)&dev->sn_cap->sn_hi;
1079 
1080 		SPDK_INFOLOG(vmd, "\tSN: %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x\n",
1081 			     snHi[3], snHi[2], snHi[1], snHi[0], snLow[3], snLow[2], snLow[1], snLow[0]);
1082 	}
1083 }
1084 
1085 static void
1086 vmd_cache_scan_info(struct vmd_pci_device *dev)
1087 {
1088 	uint32_t reg __attribute__((unused));
1089 
1090 	if (dev->header_type == PCI_HEADER_TYPE_NORMAL) {
1091 		return;
1092 	}
1093 
1094 	SPDK_DEBUGLOG(vmd, "vendor/device id:%x:%x\n", dev->header->common.vendor_id,
1095 		      dev->header->common.device_id);
1096 
1097 	if (vmd_device_is_root_port(dev)) {
1098 		dev->header->one.prefetch_base_upper = VMD_UPPER_BASE_SIGNATURE;
1099 		reg = dev->header->one.prefetch_base_upper;
1100 		dev->header->one.prefetch_limit_upper = VMD_UPPER_LIMIT_SIGNATURE;
1101 		reg = dev->header->one.prefetch_limit_upper;
1102 
1103 		SPDK_DEBUGLOG(vmd, "prefetch: %x:%x\n",
1104 			      dev->header->one.prefetch_base_upper,
1105 			      dev->header->one.prefetch_limit_upper);
1106 	}
1107 }
1108 
1109 static uint8_t
1110 vmd_scan_pcibus(struct vmd_pci_bus *bus)
1111 {
1112 	struct vmd_pci_bus *bus_entry;
1113 	struct vmd_pci_device *dev;
1114 	uint8_t dev_cnt;
1115 
1116 	g_end_device_count = 0;
1117 	TAILQ_INSERT_TAIL(&bus->vmd->bus_list, bus, tailq);
1118 	bus->vmd->next_bus_number = bus->bus_number + 1;
1119 	dev_cnt = vmd_scan_single_bus(bus, NULL);
1120 
1121 	SPDK_DEBUGLOG(vmd, "VMD scan found %u devices\n", dev_cnt);
1122 	SPDK_DEBUGLOG(vmd, "VMD scan found %u END DEVICES\n", g_end_device_count);
1123 
1124 	SPDK_INFOLOG(vmd, "PCIe devices attached to VMD %04x:%02x:%02x:%x...\n",
1125 		     bus->vmd->pci->addr.domain, bus->vmd->pci->addr.bus,
1126 		     bus->vmd->pci->addr.dev, bus->vmd->pci->addr.func);
1127 
1128 	TAILQ_FOREACH(bus_entry, &bus->vmd->bus_list, tailq) {
1129 		if (bus_entry->self != NULL) {
1130 			vmd_print_pci_info(bus_entry->self);
1131 			vmd_cache_scan_info(bus_entry->self);
1132 		}
1133 
1134 		TAILQ_FOREACH(dev, &bus_entry->dev_list, tailq) {
1135 			vmd_print_pci_info(dev);
1136 		}
1137 	}
1138 
1139 	return dev_cnt;
1140 }
1141 
1142 static int
1143 vmd_map_bars(struct vmd_adapter *vmd, struct spdk_pci_device *dev)
1144 {
1145 	int rc;
1146 
1147 	rc = spdk_pci_device_map_bar(dev, 0, (void **)&vmd->cfg_vaddr,
1148 				     &vmd->cfgbar, &vmd->cfgbar_size);
1149 	if (rc == 0) {
1150 		rc = spdk_pci_device_map_bar(dev, 2, (void **)&vmd->mem_vaddr,
1151 					     &vmd->membar, &vmd->membar_size);
1152 	}
1153 
1154 	if (rc == 0) {
1155 		rc = spdk_pci_device_map_bar(dev, 4, (void **)&vmd->msix_vaddr,
1156 					     &vmd->msixbar, &vmd->msixbar_size);
1157 	}
1158 
1159 	if (rc == 0) {
1160 		vmd->physical_addr = vmd->membar;
1161 		vmd->current_addr_size = vmd->membar_size;
1162 	}
1163 	return rc;
1164 }
1165 
1166 static void
1167 vmd_set_starting_bus_number(struct vmd_adapter *vmd, uint8_t *bus_start,
1168 			    uint8_t *max_bus)
1169 {
1170 	uint32_t vmd_cap = 0, vmd_config = 0;
1171 	uint8_t bus_restrict_cap, bus_restrictions;
1172 
1173 	spdk_pci_device_cfg_read32(vmd->pci, &vmd_cap, PCI_VMD_VMCAP);
1174 	spdk_pci_device_cfg_read32(vmd->pci, &vmd_config, PCI_VMD_VMCONFIG);
1175 
1176 	bus_restrict_cap = vmd_cap & 0x1; /* bit 0 */
1177 	bus_restrictions = (vmd_config >> 8) & 0x3; /* bits 8-9 */
1178 	if ((bus_restrict_cap == 0x1) && (bus_restrictions == 0x1)) {
1179 		*bus_start = 128;
1180 		*max_bus = 255;
1181 	} else {
1182 		*bus_start = 0;
1183 		*max_bus = 127;
1184 	}
1185 }
1186 
1187 static int
1188 vmd_enumerate_devices(struct vmd_adapter *vmd)
1189 {
1190 	uint8_t max_bus, bus_start;
1191 
1192 	vmd->vmd_bus.vmd = vmd;
1193 	vmd->vmd_bus.domain = vmd->pci->addr.domain;
1194 
1195 	if (vmd->pci->id.device_id == PCI_DEVICE_ID_INTEL_VMD_ICX) {
1196 		vmd_set_starting_bus_number(vmd, &bus_start, &max_bus);
1197 		vmd->vmd_bus.bus_start = bus_start;
1198 		vmd->vmd_bus.secondary_bus = vmd->vmd_bus.subordinate_bus = vmd->vmd_bus.bus_start;
1199 		vmd->vmd_bus.primary_bus = vmd->vmd_bus.bus_number = vmd->vmd_bus.bus_start;
1200 		vmd->max_pci_bus = max_bus;
1201 	} else {
1202 		vmd->vmd_bus.bus_start = 0;
1203 		vmd->vmd_bus.secondary_bus = vmd->vmd_bus.subordinate_bus = 0;
1204 		vmd->vmd_bus.primary_bus = vmd->vmd_bus.bus_number = 0;
1205 		vmd->max_pci_bus = PCI_MAX_BUS_NUMBER;
1206 	}
1207 
1208 	return vmd_scan_pcibus(&vmd->vmd_bus);
1209 }
1210 
1211 struct vmd_pci_device *
1212 vmd_find_device(const struct spdk_pci_addr *addr)
1213 {
1214 	struct vmd_pci_bus *bus;
1215 	struct vmd_pci_device *dev;
1216 	int i;
1217 
1218 	for (i = 0; i < MAX_VMD_TARGET; ++i) {
1219 		TAILQ_FOREACH(bus, &g_vmd_container.vmd[i].bus_list, tailq) {
1220 			if (bus->self) {
1221 				if (spdk_pci_addr_compare(&bus->self->pci.addr, addr) == 0) {
1222 					return bus->self;
1223 				}
1224 			}
1225 
1226 			TAILQ_FOREACH(dev, &bus->dev_list, tailq) {
1227 				if (spdk_pci_addr_compare(&dev->pci.addr, addr) == 0) {
1228 					return dev;
1229 				}
1230 			}
1231 		}
1232 	}
1233 
1234 	return NULL;
1235 }
1236 
1237 static int
1238 vmd_enum_cb(void *ctx, struct spdk_pci_device *pci_dev)
1239 {
1240 	uint32_t cmd_reg = 0;
1241 	char bdf[32] = {0};
1242 	struct vmd_container *vmd_c = ctx;
1243 	size_t i;
1244 
1245 	spdk_pci_device_cfg_read32(pci_dev, &cmd_reg, 4);
1246 	cmd_reg |= 0x6;                      /* PCI bus master/memory enable. */
1247 	spdk_pci_device_cfg_write32(pci_dev, cmd_reg, 4);
1248 
1249 	spdk_pci_addr_fmt(bdf, sizeof(bdf), &pci_dev->addr);
1250 	SPDK_DEBUGLOG(vmd, "Found a VMD[ %d ] at %s\n", vmd_c->count, bdf);
1251 
1252 	/* map vmd bars */
1253 	i = vmd_c->count;
1254 	vmd_c->vmd[i].pci = pci_dev;
1255 	vmd_c->vmd[i].vmd_index = i;
1256 	vmd_c->vmd[i].domain =
1257 		(pci_dev->addr.bus << 16) | (pci_dev->addr.dev << 8) | pci_dev->addr.func;
1258 	TAILQ_INIT(&vmd_c->vmd[i].bus_list);
1259 
1260 	if (vmd_map_bars(&vmd_c->vmd[i], pci_dev) == -1) {
1261 		return -1;
1262 	}
1263 
1264 	SPDK_DEBUGLOG(vmd, "vmd config bar(%p) vaddr(%p) size(%x)\n",
1265 		      (void *)vmd_c->vmd[i].cfgbar, (void *)vmd_c->vmd[i].cfg_vaddr,
1266 		      (uint32_t)vmd_c->vmd[i].cfgbar_size);
1267 	SPDK_DEBUGLOG(vmd, "vmd mem bar(%p) vaddr(%p) size(%x)\n",
1268 		      (void *)vmd_c->vmd[i].membar, (void *)vmd_c->vmd[i].mem_vaddr,
1269 		      (uint32_t)vmd_c->vmd[i].membar_size);
1270 	SPDK_DEBUGLOG(vmd, "vmd msix bar(%p) vaddr(%p) size(%x)\n\n",
1271 		      (void *)vmd_c->vmd[i].msixbar, (void *)vmd_c->vmd[i].msix_vaddr,
1272 		      (uint32_t)vmd_c->vmd[i].msixbar_size);
1273 
1274 	vmd_c->count = i + 1;
1275 
1276 	vmd_enumerate_devices(&vmd_c->vmd[i]);
1277 
1278 	return 0;
1279 }
1280 
1281 int
1282 spdk_vmd_pci_device_list(struct spdk_pci_addr vmd_addr, struct spdk_pci_device *nvme_list)
1283 {
1284 	int cnt = 0;
1285 	struct vmd_pci_bus *bus;
1286 	struct vmd_pci_device *dev;
1287 
1288 	if (!nvme_list) {
1289 		return -1;
1290 	}
1291 
1292 	for (int i = 0; i < MAX_VMD_TARGET; ++i) {
1293 		if (spdk_pci_addr_compare(&vmd_addr, &g_vmd_container.vmd[i].pci->addr) == 0) {
1294 			TAILQ_FOREACH(bus, &g_vmd_container.vmd[i].bus_list, tailq) {
1295 				TAILQ_FOREACH(dev, &bus->dev_list, tailq) {
1296 					nvme_list[cnt++] = dev->pci;
1297 					if (!dev->is_hooked) {
1298 						vmd_dev_init(dev);
1299 						dev->is_hooked = 1;
1300 					}
1301 				}
1302 			}
1303 		}
1304 	}
1305 
1306 	return cnt;
1307 }
1308 
1309 static void
1310 vmd_clear_hotplug_status(struct vmd_pci_bus *bus)
1311 {
1312 	struct vmd_pci_device *device = bus->self;
1313 	uint16_t status __attribute__((unused));
1314 
1315 	status = device->pcie_cap->slot_status.as_uint16_t;
1316 	device->pcie_cap->slot_status.as_uint16_t = status;
1317 	status = device->pcie_cap->slot_status.as_uint16_t;
1318 
1319 	status = device->pcie_cap->link_status.as_uint16_t;
1320 	device->pcie_cap->link_status.as_uint16_t = status;
1321 	status = device->pcie_cap->link_status.as_uint16_t;
1322 }
1323 
1324 static void
1325 vmd_bus_handle_hotplug(struct vmd_pci_bus *bus)
1326 {
1327 	uint8_t num_devices, sleep_count;
1328 
1329 	for (sleep_count = 0; sleep_count < 20; ++sleep_count) {
1330 		/* Scan until a new device is found */
1331 		num_devices = vmd_scan_single_bus(bus, bus->self);
1332 		if (num_devices > 0) {
1333 			break;
1334 		}
1335 
1336 		spdk_delay_us(200000);
1337 	}
1338 
1339 	if (num_devices == 0) {
1340 		SPDK_ERRLOG("Timed out while scanning for hotplugged devices\n");
1341 	}
1342 }
1343 
1344 static void
1345 vmd_bus_handle_hotremove(struct vmd_pci_bus *bus)
1346 {
1347 	struct vmd_pci_device *device, *tmpdev;
1348 
1349 	TAILQ_FOREACH_SAFE(device, &bus->dev_list, tailq, tmpdev) {
1350 		if (!vmd_bus_device_present(bus, device->devfn)) {
1351 			device->pci.internal.pending_removal = true;
1352 
1353 			/* If the device isn't attached, remove it immediately */
1354 			if (!device->pci.internal.attached) {
1355 				vmd_dev_detach(&device->pci);
1356 			}
1357 		}
1358 	}
1359 }
1360 
1361 int
1362 spdk_vmd_hotplug_monitor(void)
1363 {
1364 	struct vmd_pci_bus *bus;
1365 	struct vmd_pci_device *device;
1366 	int num_hotplugs = 0;
1367 	uint32_t i;
1368 
1369 	for (i = 0; i < g_vmd_container.count; ++i) {
1370 		TAILQ_FOREACH(bus, &g_vmd_container.vmd[i].bus_list, tailq) {
1371 			device = bus->self;
1372 			if (device == NULL || !device->hotplug_capable) {
1373 				continue;
1374 			}
1375 
1376 			if (device->pcie_cap->slot_status.bit_field.datalink_state_changed != 1) {
1377 				continue;
1378 			}
1379 
1380 			if (device->pcie_cap->link_status.bit_field.datalink_layer_active == 1) {
1381 				SPDK_DEBUGLOG(vmd, "Device hotplug detected on bus "
1382 					      "%"PRIu32"\n", bus->bus_number);
1383 				vmd_bus_handle_hotplug(bus);
1384 			} else {
1385 				SPDK_DEBUGLOG(vmd, "Device hotremove detected on bus "
1386 					      "%"PRIu32"\n", bus->bus_number);
1387 				vmd_bus_handle_hotremove(bus);
1388 			}
1389 
1390 			vmd_clear_hotplug_status(bus);
1391 			num_hotplugs++;
1392 		}
1393 	}
1394 
1395 	return num_hotplugs;
1396 }
1397 
1398 int
1399 spdk_vmd_init(void)
1400 {
1401 	return spdk_pci_enumerate(spdk_pci_vmd_get_driver(), vmd_enum_cb, &g_vmd_container);
1402 }
1403 
1404 void
1405 spdk_vmd_fini(void)
1406 {
1407 	uint32_t i;
1408 
1409 	for (i = 0; i < g_vmd_container.count; ++i) {
1410 		spdk_pci_device_detach(g_vmd_container.vmd[i].pci);
1411 	}
1412 }
1413 
1414 SPDK_LOG_REGISTER_COMPONENT(vmd)
1415