xref: /spdk/lib/vmd/vmd.c (revision 60982c759db49b4f4579f16e3b24df0725ba4b94)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2019 Intel Corporation.
3  *   All rights reserved.
4  */
5 
6 #include "vmd_internal.h"
7 
8 #include "spdk/stdinc.h"
9 #include "spdk/string.h"
10 #include "spdk/likely.h"
11 
12 static unsigned char *device_type[] = {
13 	"PCI Express Endpoint",
14 	"Legacy PCI Express Endpoint",
15 	"Reserved 1",
16 	"Reserved 2",
17 	"Root Port of PCI Express Root Complex",
18 	"Upstream Port of PCI Express Switch",
19 	"Downstream Port of PCI Express Switch",
20 	"PCI Express to PCI/PCI-X Bridge",
21 	"PCI/PCI-X to PCI Express Bridge",
22 	"Root Complex Integrated Endpoint",
23 	"Root Complex Event Collector",
24 	"Reserved Capability"
25 };
26 
27 /*
28  * Container for all VMD adapter probed in the system.
29  */
30 struct vmd_container {
31 	uint32_t count;
32 	struct vmd_adapter vmd[MAX_VMD_SUPPORTED];
33 };
34 
35 static struct vmd_container g_vmd_container;
36 static uint8_t g_end_device_count;
37 
38 static bool
39 vmd_is_valid_cfg_addr(struct vmd_pci_bus *bus, uint64_t addr)
40 {
41 	return addr >= (uint64_t)bus->vmd->cfg_vaddr &&
42 	       addr < bus->vmd->cfgbar_size + (uint64_t)bus->vmd->cfg_vaddr;
43 }
44 
45 static void
46 vmd_align_base_addrs(struct vmd_adapter *vmd, uint32_t alignment)
47 {
48 	uint32_t pad;
49 
50 	/*
51 	 *  Device is not in hot plug path, align the base address remaining from membar 1.
52 	 */
53 	if (vmd->physical_addr & (alignment - 1)) {
54 		pad = alignment - (vmd->physical_addr & (alignment - 1));
55 		vmd->physical_addr += pad;
56 		vmd->current_addr_size -= pad;
57 	}
58 }
59 
60 static bool
61 vmd_device_is_enumerated(volatile struct pci_header *header)
62 {
63 	return header->one.prefetch_base_upper == VMD_UPPER_BASE_SIGNATURE &&
64 	       header->one.prefetch_limit_upper == VMD_UPPER_LIMIT_SIGNATURE;
65 }
66 
67 static bool
68 vmd_device_is_root_port(volatile struct pci_header *header)
69 {
70 	return header->common.vendor_id == SPDK_PCI_VID_INTEL &&
71 	       (header->common.device_id == PCI_ROOT_PORT_A_INTEL_SKX ||
72 		header->common.device_id == PCI_ROOT_PORT_B_INTEL_SKX ||
73 		header->common.device_id == PCI_ROOT_PORT_C_INTEL_SKX ||
74 		header->common.device_id == PCI_ROOT_PORT_D_INTEL_SKX ||
75 		header->common.device_id == PCI_ROOT_PORT_A_INTEL_ICX ||
76 		header->common.device_id == PCI_ROOT_PORT_B_INTEL_ICX ||
77 		header->common.device_id == PCI_ROOT_PORT_C_INTEL_ICX ||
78 		header->common.device_id == PCI_ROOT_PORT_D_INTEL_ICX);
79 }
80 
81 static void
82 vmd_hotplug_coalesce_regions(struct vmd_hot_plug *hp)
83 {
84 	struct pci_mem_mgr *region, *prev;
85 
86 	do {
87 		prev = NULL;
88 		TAILQ_FOREACH(region, &hp->free_mem_queue, tailq) {
89 			if (prev != NULL && (prev->addr + prev->size == region->addr)) {
90 				break;
91 			}
92 
93 			prev = region;
94 		}
95 
96 		if (region != NULL) {
97 			prev->size += region->size;
98 			TAILQ_REMOVE(&hp->free_mem_queue, region, tailq);
99 			TAILQ_INSERT_TAIL(&hp->unused_mem_queue, region, tailq);
100 		}
101 	} while (region != NULL);
102 }
103 
104 static void
105 vmd_hotplug_free_region(struct vmd_hot_plug *hp, struct pci_mem_mgr *region)
106 {
107 	struct pci_mem_mgr *current, *prev = NULL;
108 
109 	assert(region->addr >= hp->bar.start && region->addr < hp->bar.start + hp->bar.size);
110 
111 	TAILQ_FOREACH(current, &hp->free_mem_queue, tailq) {
112 		if (current->addr > region->addr) {
113 			break;
114 		}
115 
116 		prev = current;
117 	}
118 
119 	if (prev != NULL) {
120 		assert(prev->addr + prev->size <= region->addr);
121 		assert(current == NULL || (region->addr + region->size <= current->addr));
122 		TAILQ_INSERT_AFTER(&hp->free_mem_queue, prev, region, tailq);
123 	} else {
124 		TAILQ_INSERT_HEAD(&hp->free_mem_queue, region, tailq);
125 	}
126 
127 	vmd_hotplug_coalesce_regions(hp);
128 }
129 
130 static void
131 vmd_hotplug_free_addr(struct vmd_hot_plug *hp, uint64_t addr)
132 {
133 	struct pci_mem_mgr *region;
134 
135 	TAILQ_FOREACH(region, &hp->alloc_mem_queue, tailq) {
136 		if (region->addr == addr) {
137 			break;
138 		}
139 	}
140 
141 	assert(region != NULL);
142 	TAILQ_REMOVE(&hp->alloc_mem_queue, region, tailq);
143 
144 	vmd_hotplug_free_region(hp, region);
145 }
146 
147 static uint64_t
148 vmd_hotplug_allocate_base_addr(struct vmd_hot_plug *hp, uint32_t size)
149 {
150 	struct pci_mem_mgr *region = NULL, *free_region;
151 
152 	TAILQ_FOREACH(region, &hp->free_mem_queue, tailq) {
153 		if (region->size >= size) {
154 			break;
155 		}
156 	}
157 
158 	if (region == NULL) {
159 		SPDK_INFOLOG(vmd, "Unable to find free hotplug memory region of size:"
160 			     "%"PRIx32"\n", size);
161 		return 0;
162 	}
163 
164 	TAILQ_REMOVE(&hp->free_mem_queue, region, tailq);
165 	if (size < region->size) {
166 		free_region = TAILQ_FIRST(&hp->unused_mem_queue);
167 		if (free_region == NULL) {
168 			SPDK_INFOLOG(vmd, "Unable to find unused descriptor to store the "
169 				     "free region of size: %"PRIu32"\n", region->size - size);
170 		} else {
171 			TAILQ_REMOVE(&hp->unused_mem_queue, free_region, tailq);
172 			free_region->size = region->size - size;
173 			free_region->addr = region->addr + size;
174 			region->size = size;
175 			vmd_hotplug_free_region(hp, free_region);
176 		}
177 	}
178 
179 	TAILQ_INSERT_TAIL(&hp->alloc_mem_queue, region, tailq);
180 
181 	return region->addr;
182 }
183 
184 /*
185  *  Allocates an address from vmd membar for the input memory size
186  *  vmdAdapter - vmd adapter object
187  *  dev - vmd_pci_device to allocate a base address for.
188  *  size - size of the memory window requested.
189  *  Size must be an integral multiple of 2. Addresses are returned on the size boundary.
190  *  Returns physical address within the VMD membar window, or 0x0 if cannot allocate window.
191  *  Consider increasing the size of vmd membar if 0x0 is returned.
192  */
193 static uint64_t
194 vmd_allocate_base_addr(struct vmd_adapter *vmd, struct vmd_pci_device *dev, uint32_t size)
195 {
196 	uint64_t base_address = 0, padding = 0;
197 	struct vmd_pci_bus *hp_bus;
198 
199 	if (size && ((size & (~size + 1)) != size)) {
200 		return base_address;
201 	}
202 
203 	/*
204 	 *  If device is downstream of a hot plug port, allocate address from the
205 	 *  range dedicated for the hot plug slot. Search the list of addresses allocated to determine
206 	 *  if a free range exists that satisfy the input request.  If a free range cannot be found,
207 	 *  get a buffer from the  unused chunk. First fit algorithm, is used.
208 	 */
209 	if (dev) {
210 		hp_bus = dev->parent;
211 		if (hp_bus && hp_bus->self && hp_bus->self->hotplug_capable) {
212 			return vmd_hotplug_allocate_base_addr(&hp_bus->self->hp, size);
213 		}
214 	}
215 
216 	/* Ensure physical membar allocated is size aligned */
217 	if (vmd->physical_addr & (size - 1)) {
218 		padding = size - (vmd->physical_addr & (size - 1));
219 	}
220 
221 	/* Allocate from membar if enough memory is left */
222 	if (vmd->current_addr_size >= size + padding) {
223 		base_address = vmd->physical_addr + padding;
224 		vmd->physical_addr += size + padding;
225 		vmd->current_addr_size -= size + padding;
226 	}
227 
228 	SPDK_INFOLOG(vmd, "allocated(size) %" PRIx64 " (%x)\n", base_address, size);
229 
230 	return base_address;
231 }
232 
233 static bool
234 vmd_is_end_device(struct vmd_pci_device *dev)
235 {
236 	return (dev && dev->header) &&
237 	       ((dev->header->common.header_type & ~PCI_MULTI_FUNCTION) == PCI_HEADER_TYPE_NORMAL);
238 }
239 
240 static void
241 vmd_update_base_limit_register(struct vmd_pci_device *dev, uint16_t base, uint16_t limit)
242 {
243 	struct vmd_pci_bus *bus;
244 	struct vmd_pci_device *bridge;
245 
246 	if (base == 0 ||  limit == 0) {
247 		return;
248 	}
249 
250 	if (dev->header->common.header_type == PCI_HEADER_TYPE_BRIDGE) {
251 		bus = dev->bus_object;
252 	} else {
253 		bus = dev->parent;
254 	}
255 
256 	bridge = bus->self;
257 	SPDK_INFOLOG(vmd, "base:limit = %x:%x\n", bridge->header->one.mem_base,
258 		     bridge->header->one.mem_limit);
259 
260 	if (dev->bus->vmd->scan_completed) {
261 		return;
262 	}
263 
264 	while (bus && bus->self != NULL) {
265 		bridge = bus->self;
266 
267 		/* This is only for 32-bit memory space, need to revisit to support 64-bit */
268 		if (bridge->header->one.mem_base > base) {
269 			bridge->header->one.mem_base = base;
270 			base = bridge->header->one.mem_base;
271 		}
272 
273 		if (bridge->header->one.mem_limit < limit) {
274 			bridge->header->one.mem_limit = limit;
275 			limit = bridge->header->one.mem_limit;
276 		}
277 
278 		bus = bus->parent;
279 	}
280 }
281 
282 static uint64_t
283 vmd_get_base_addr(struct vmd_pci_device *dev, uint32_t index, uint32_t size)
284 {
285 	struct vmd_pci_bus *bus = dev->parent;
286 
287 	if (dev->header_type == PCI_HEADER_TYPE_BRIDGE) {
288 		return dev->header->zero.BAR[index] & ~0xf;
289 	} else {
290 		if (bus->self->hotplug_capable) {
291 			return vmd_hotplug_allocate_base_addr(&bus->self->hp, size);
292 		} else {
293 			return (uint64_t)bus->self->header->one.mem_base << 16;
294 		}
295 	}
296 }
297 
298 static bool
299 vmd_assign_base_addrs(struct vmd_pci_device *dev)
300 {
301 	uint16_t mem_base = 0, mem_limit = 0;
302 	unsigned char mem_attr = 0;
303 	int last;
304 	struct vmd_adapter *vmd = NULL;
305 	bool ret_val = false;
306 	uint32_t bar_value;
307 	uint32_t table_offset;
308 
309 	if (dev && dev->bus) {
310 		vmd = dev->bus->vmd;
311 	}
312 
313 	if (!vmd) {
314 		return 0;
315 	}
316 
317 	vmd_align_base_addrs(vmd, ONE_MB);
318 
319 	last = dev->header_type ? 2 : 6;
320 	for (int i = 0; i < last; i++) {
321 		bar_value = dev->header->zero.BAR[i];
322 		dev->header->zero.BAR[i] = ~(0U);
323 		dev->bar[i].size = dev->header->zero.BAR[i];
324 		dev->header->zero.BAR[i] = bar_value;
325 
326 		if (dev->bar[i].size == ~(0U) || dev->bar[i].size == 0  ||
327 		    dev->header->zero.BAR[i] & 1) {
328 			dev->bar[i].size = 0;
329 			continue;
330 		}
331 		mem_attr = dev->bar[i].size & PCI_BASE_ADDR_MASK;
332 		dev->bar[i].size = TWOS_COMPLEMENT(dev->bar[i].size & PCI_BASE_ADDR_MASK);
333 
334 		if (vmd->scan_completed) {
335 			dev->bar[i].start = vmd_get_base_addr(dev, i, dev->bar[i].size);
336 		} else {
337 			dev->bar[i].start = vmd_allocate_base_addr(vmd, dev, dev->bar[i].size);
338 		}
339 
340 		dev->header->zero.BAR[i] = (uint32_t)dev->bar[i].start;
341 
342 		if (!dev->bar[i].start) {
343 			if (mem_attr == (PCI_BAR_MEMORY_PREFETCH | PCI_BAR_MEMORY_TYPE_64)) {
344 				i++;
345 			}
346 			continue;
347 		}
348 
349 		dev->bar[i].vaddr = ((uint64_t)vmd->mem_vaddr + (dev->bar[i].start - vmd->membar));
350 		mem_limit = BRIDGE_BASEREG(dev->header->zero.BAR[i]) +
351 			    BRIDGE_BASEREG(dev->bar[i].size - 1);
352 		if (!mem_base) {
353 			mem_base = BRIDGE_BASEREG(dev->header->zero.BAR[i]);
354 		}
355 
356 		ret_val = true;
357 
358 		if (mem_attr == (PCI_BAR_MEMORY_PREFETCH | PCI_BAR_MEMORY_TYPE_64)) {
359 			i++;
360 			if (i < last) {
361 				dev->header->zero.BAR[i] = (uint32_t)(dev->bar[i].start >> PCI_DWORD_SHIFT);
362 			}
363 		}
364 	}
365 
366 	/* Enable device MEM and bus mastering */
367 	dev->header->zero.command |= (PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
368 	/*
369 	 * Writes to the pci config space is posted write. To ensure transaction reaches its destination
370 	 * before another write is posed, an immediate read of the written value should be performed.
371 	 */
372 	{ uint16_t cmd = dev->header->zero.command; (void)cmd; }
373 
374 	if (dev->msix_cap && ret_val) {
375 		table_offset = ((volatile struct pci_msix_cap *)dev->msix_cap)->msix_table_offset;
376 		if (dev->bar[table_offset & 0x3].vaddr) {
377 			dev->msix_table = (volatile struct pci_msix_table_entry *)
378 					  (dev->bar[table_offset & 0x3].vaddr + (table_offset & 0xfff8));
379 		}
380 	}
381 
382 	if (ret_val && vmd_is_end_device(dev)) {
383 		vmd_update_base_limit_register(dev, mem_base, mem_limit);
384 	}
385 
386 	return ret_val;
387 }
388 
389 static void
390 vmd_get_device_capabilities(struct vmd_pci_device *dev)
391 
392 {
393 	volatile uint8_t *config_space;
394 	uint8_t capabilities_offset;
395 	struct pci_capabilities_header *capabilities_hdr;
396 
397 	config_space = (volatile uint8_t *)dev->header;
398 	if ((dev->header->common.status  & PCI_CAPABILITIES_LIST) == 0) {
399 		return;
400 	}
401 
402 	capabilities_offset = dev->header->zero.cap_pointer;
403 	if (dev->header->common.header_type & PCI_HEADER_TYPE_BRIDGE) {
404 		capabilities_offset = dev->header->one.cap_pointer;
405 	}
406 
407 	while (capabilities_offset > 0) {
408 		capabilities_hdr = (struct pci_capabilities_header *)
409 				   &config_space[capabilities_offset];
410 		switch (capabilities_hdr->capability_id) {
411 		case CAPABILITY_ID_PCI_EXPRESS:
412 			dev->pcie_cap = (volatile struct pci_express_cap *)(capabilities_hdr);
413 			break;
414 
415 		case CAPABILITY_ID_MSI:
416 			dev->msi_cap = (volatile struct pci_msi_cap *)capabilities_hdr;
417 			break;
418 
419 		case CAPABILITY_ID_MSIX:
420 			dev->msix_cap = (volatile struct pci_msix_capability *)capabilities_hdr;
421 			dev->msix_table_size = dev->msix_cap->message_control.bit.table_size + 1;
422 			break;
423 
424 		default:
425 			break;
426 		}
427 		capabilities_offset = capabilities_hdr->next;
428 	}
429 }
430 
431 static volatile struct pci_enhanced_capability_header *
432 vmd_get_enhanced_capabilities(struct vmd_pci_device *dev, uint16_t capability_id)
433 {
434 	uint8_t *data;
435 	uint16_t cap_offset = EXTENDED_CAPABILITY_OFFSET;
436 	volatile struct pci_enhanced_capability_header *cap_hdr = NULL;
437 
438 	data = (uint8_t *)dev->header;
439 	while (cap_offset >= EXTENDED_CAPABILITY_OFFSET) {
440 		cap_hdr = (volatile struct pci_enhanced_capability_header *) &data[cap_offset];
441 		if (cap_hdr->capability_id == capability_id) {
442 			return cap_hdr;
443 		}
444 		cap_offset = cap_hdr->next;
445 		if (cap_offset == 0 || cap_offset < EXTENDED_CAPABILITY_OFFSET) {
446 			break;
447 		}
448 	}
449 
450 	return NULL;
451 }
452 
453 static void
454 vmd_read_config_space(struct vmd_pci_device *dev)
455 {
456 	/*
457 	 * Writes to the pci config space is posted weite. To ensure transaction reaches its destination
458 	 * before another write is posed, an immediate read of the written value should be performed.
459 	 */
460 	dev->header->common.command |= (BUS_MASTER_ENABLE | MEMORY_SPACE_ENABLE);
461 	{ uint16_t cmd = dev->header->common.command; (void)cmd; }
462 
463 	vmd_get_device_capabilities(dev);
464 	dev->sn_cap = (struct serial_number_capability *)vmd_get_enhanced_capabilities(dev,
465 			DEVICE_SERIAL_NUMBER_CAP_ID);
466 }
467 
468 static void
469 vmd_update_scan_info(struct vmd_pci_device *dev)
470 {
471 	struct vmd_adapter *vmd_adapter = dev->bus->vmd;
472 
473 	if (vmd_adapter->root_port_updated) {
474 		return;
475 	}
476 
477 	if (dev->header_type == PCI_HEADER_TYPE_NORMAL) {
478 		return;
479 	}
480 
481 	if (vmd_device_is_root_port(dev->header)) {
482 		vmd_adapter->root_port_updated = 1;
483 		SPDK_INFOLOG(vmd, "root_port_updated = %d\n",
484 			     vmd_adapter->root_port_updated);
485 		SPDK_INFOLOG(vmd, "upper:limit = %x : %x\n",
486 			     dev->header->one.prefetch_base_upper,
487 			     dev->header->one.prefetch_limit_upper);
488 		if (vmd_device_is_enumerated(dev->header)) {
489 			vmd_adapter->scan_completed = 1;
490 			SPDK_INFOLOG(vmd, "scan_completed = %d\n",
491 				     vmd_adapter->scan_completed);
492 		}
493 	}
494 }
495 
496 static void
497 vmd_reset_base_limit_registers(volatile struct pci_header *header)
498 {
499 	uint32_t reg __attribute__((unused));
500 
501 	/*
502 	 * Writes to the pci config space are posted writes.
503 	 * To ensure transaction reaches its destination
504 	 * before another write is posted, an immediate read
505 	 * of the written value should be performed.
506 	 */
507 	header->one.mem_base = 0xfff0;
508 	reg = header->one.mem_base;
509 	header->one.mem_limit = 0x0;
510 	reg = header->one.mem_limit;
511 	header->one.prefetch_base = 0x0;
512 	reg = header->one.prefetch_base;
513 	header->one.prefetch_limit = 0x0;
514 	reg = header->one.prefetch_limit;
515 	header->one.prefetch_base_upper = 0x0;
516 	reg = header->one.prefetch_base_upper;
517 	header->one.prefetch_limit_upper = 0x0;
518 	reg = header->one.prefetch_limit_upper;
519 	header->one.io_base_upper = 0x0;
520 	reg = header->one.io_base_upper;
521 	header->one.io_limit_upper = 0x0;
522 	reg = header->one.io_limit_upper;
523 	header->one.primary = 0;
524 	reg = header->one.primary;
525 	header->one.secondary = 0;
526 	reg = header->one.secondary;
527 	header->one.subordinate = 0;
528 	reg = header->one.subordinate;
529 }
530 
531 static void
532 vmd_init_hotplug(struct vmd_pci_device *dev, struct vmd_pci_bus *bus)
533 {
534 	struct vmd_adapter *vmd = bus->vmd;
535 	struct vmd_hot_plug *hp = &dev->hp;
536 	size_t mem_id;
537 
538 	dev->hotplug_capable = true;
539 	hp->bar.size = 1 << 20;
540 
541 	if (!vmd->scan_completed) {
542 		hp->bar.start = vmd_allocate_base_addr(vmd, NULL, hp->bar.size);
543 		bus->self->header->one.mem_base = BRIDGE_BASEREG(hp->bar.start);
544 		bus->self->header->one.mem_limit =
545 			bus->self->header->one.mem_base + BRIDGE_BASEREG(hp->bar.size - 1);
546 	} else {
547 		hp->bar.start = (uint64_t)bus->self->header->one.mem_base << 16;
548 	}
549 
550 	hp->bar.vaddr = (uint64_t)vmd->mem_vaddr + (hp->bar.start - vmd->membar);
551 
552 	TAILQ_INIT(&hp->free_mem_queue);
553 	TAILQ_INIT(&hp->unused_mem_queue);
554 	TAILQ_INIT(&hp->alloc_mem_queue);
555 
556 	hp->mem[0].size = hp->bar.size;
557 	hp->mem[0].addr = hp->bar.start;
558 
559 	TAILQ_INSERT_TAIL(&hp->free_mem_queue, &hp->mem[0], tailq);
560 
561 	for (mem_id = 1; mem_id < ADDR_ELEM_COUNT; ++mem_id) {
562 		TAILQ_INSERT_TAIL(&hp->unused_mem_queue, &hp->mem[mem_id], tailq);
563 	}
564 
565 	SPDK_INFOLOG(vmd, "%s: mem_base:mem_limit = %x : %x\n", __func__,
566 		     bus->self->header->one.mem_base, bus->self->header->one.mem_limit);
567 }
568 
569 static bool
570 vmd_bus_device_present(struct vmd_pci_bus *bus, uint32_t devfn)
571 {
572 	volatile struct pci_header *header;
573 
574 	header = (volatile struct pci_header *)(bus->vmd->cfg_vaddr +
575 						CONFIG_OFFSET_ADDR(bus->config_bus_number, devfn, 0, 0));
576 	if (!vmd_is_valid_cfg_addr(bus, (uint64_t)header)) {
577 		return false;
578 	}
579 
580 	if (header->common.vendor_id == PCI_INVALID_VENDORID || header->common.vendor_id == 0x0) {
581 		return false;
582 	}
583 
584 	return true;
585 }
586 
587 static struct vmd_pci_device *
588 vmd_alloc_dev(struct vmd_pci_bus *bus, uint32_t devfn)
589 {
590 	struct vmd_pci_device *dev = NULL;
591 	struct pci_header volatile *header;
592 	uint8_t header_type;
593 	uint32_t rev_class;
594 
595 	/* Make sure we're not creating two devices on the same dev/fn */
596 	TAILQ_FOREACH(dev, &bus->dev_list, tailq) {
597 		if (dev->devfn == devfn) {
598 			return NULL;
599 		}
600 	}
601 
602 	if (!vmd_bus_device_present(bus, devfn)) {
603 		return NULL;
604 	}
605 
606 	header = (struct pci_header * volatile)(bus->vmd->cfg_vaddr +
607 						CONFIG_OFFSET_ADDR(bus->config_bus_number, devfn, 0, 0));
608 
609 	SPDK_INFOLOG(vmd, "PCI device found: %04x:%04x ***\n",
610 		     header->common.vendor_id, header->common.device_id);
611 
612 	dev = calloc(1, sizeof(*dev));
613 	if (!dev) {
614 		return NULL;
615 	}
616 
617 	dev->header = header;
618 	dev->vid = dev->header->common.vendor_id;
619 	dev->did = dev->header->common.device_id;
620 	dev->bus = bus;
621 	dev->parent = bus;
622 	dev->devfn = devfn;
623 	header_type = dev->header->common.header_type;
624 	rev_class = dev->header->common.rev_class;
625 	dev->class = rev_class >> 8;
626 	dev->header_type = header_type & 0x7;
627 
628 	if (header_type == PCI_HEADER_TYPE_BRIDGE) {
629 		vmd_update_scan_info(dev);
630 		if (!dev->bus->vmd->scan_completed) {
631 			vmd_reset_base_limit_registers(dev->header);
632 		}
633 	}
634 
635 	vmd_read_config_space(dev);
636 
637 	return dev;
638 }
639 
640 static struct vmd_pci_bus *
641 vmd_create_new_bus(struct vmd_pci_bus *parent, struct vmd_pci_device *bridge, uint8_t bus_number)
642 {
643 	struct vmd_pci_bus *new_bus;
644 
645 	new_bus = calloc(1, sizeof(*new_bus));
646 	if (!new_bus) {
647 		return NULL;
648 	}
649 
650 	new_bus->parent = parent;
651 	new_bus->domain = parent->domain;
652 	new_bus->bus_number = bus_number;
653 	new_bus->secondary_bus = new_bus->subordinate_bus = bus_number;
654 	new_bus->self = bridge;
655 	new_bus->vmd = parent->vmd;
656 	new_bus->config_bus_number = new_bus->bus_number - new_bus->vmd->vmd_bus.bus_start;
657 	TAILQ_INIT(&new_bus->dev_list);
658 
659 	bridge->subordinate = new_bus;
660 
661 	bridge->pci.addr.bus = new_bus->bus_number;
662 	bridge->pci.addr.dev = bridge->devfn;
663 	bridge->pci.addr.func = 0;
664 	bridge->pci.addr.domain = parent->vmd->pci->addr.domain;
665 
666 	return new_bus;
667 }
668 
669 static uint8_t
670 vmd_get_next_bus_number(struct vmd_adapter *vmd)
671 {
672 	uint8_t bus = 0xff;
673 
674 	if ((vmd->next_bus_number + 1) < vmd->max_pci_bus) {
675 		bus = vmd->next_bus_number;
676 		vmd->next_bus_number++;
677 	}
678 
679 	return bus;
680 }
681 
682 static uint8_t
683 vmd_get_hotplug_bus_numbers(struct vmd_pci_device *dev)
684 {
685 	uint8_t bus_number = 0xff;
686 
687 	if (dev && dev->bus && dev->bus->vmd &&
688 	    ((dev->bus->vmd->next_bus_number + RESERVED_HOTPLUG_BUSES) < dev->bus->vmd->max_pci_bus)) {
689 		bus_number = RESERVED_HOTPLUG_BUSES;
690 		dev->bus->vmd->next_bus_number += RESERVED_HOTPLUG_BUSES;
691 	}
692 
693 	return bus_number;
694 }
695 
696 static void
697 vmd_enable_msix(struct vmd_pci_device *dev)
698 {
699 	volatile uint16_t control;
700 
701 	control = dev->msix_cap->message_control.as_uint16_t | (1 << 14);
702 	dev->msix_cap->message_control.as_uint16_t = control;
703 	control = dev->msix_cap->message_control.as_uint16_t;
704 	dev->msix_cap->message_control.as_uint16_t = (control | (1 << 15));
705 	control = dev->msix_cap->message_control.as_uint16_t;
706 	control = control & ~(1 << 14);
707 	dev->msix_cap->message_control.as_uint16_t = control;
708 	control = dev->msix_cap->message_control.as_uint16_t;
709 }
710 
711 static void
712 vmd_disable_msix(struct vmd_pci_device *dev)
713 {
714 	volatile uint16_t control;
715 
716 	control = dev->msix_cap->message_control.as_uint16_t | (1 << 14);
717 	dev->msix_cap->message_control.as_uint16_t = control;
718 	control = dev->msix_cap->message_control.as_uint16_t & ~(1 << 15);
719 	dev->msix_cap->message_control.as_uint16_t = control;
720 	control = dev->msix_cap->message_control.as_uint16_t;
721 }
722 
723 /*
724  * Set up MSI-X table entries for the port. Vmd MSIX vector 0 is used for
725  * port interrupt, so vector 0 is mapped to all MSIX entries for the port.
726  */
727 static void
728 vmd_setup_msix(struct vmd_pci_device *dev, volatile struct pci_msix_table_entry *vmdEntry)
729 {
730 	int entry;
731 
732 	if (!dev || !vmdEntry || !dev->msix_cap) {
733 		return;
734 	}
735 
736 	vmd_disable_msix(dev);
737 	if (dev->msix_table == NULL || dev->msix_table_size > MAX_MSIX_TABLE_SIZE) {
738 		return;
739 	}
740 
741 	for (entry = 0; entry < dev->msix_table_size; ++entry) {
742 		dev->msix_table[entry].vector_control = 1;
743 	}
744 	vmd_enable_msix(dev);
745 }
746 
747 static void
748 vmd_bus_update_bridge_info(struct vmd_pci_device *bridge)
749 {
750 	/* Update the subordinate bus of all bridges above this bridge */
751 	volatile struct vmd_pci_device *dev = bridge;
752 	uint8_t subordinate_bus;
753 
754 	if (!dev) {
755 		return;
756 	}
757 	subordinate_bus = bridge->header->one.subordinate;
758 	while (dev->parent_bridge != NULL) {
759 		dev = dev->parent_bridge;
760 		if (dev->header->one.subordinate < subordinate_bus) {
761 			dev->header->one.subordinate = subordinate_bus;
762 			subordinate_bus = dev->header->one.subordinate;
763 		}
764 	}
765 }
766 
767 static bool
768 vmd_is_supported_device(struct vmd_pci_device *dev)
769 {
770 	return dev->class == PCI_CLASS_STORAGE_EXPRESS;
771 }
772 
773 static int
774 vmd_dev_map_bar(struct spdk_pci_device *pci_dev, uint32_t bar,
775 		void **mapped_addr, uint64_t *phys_addr, uint64_t *size)
776 {
777 	struct vmd_pci_device *dev = SPDK_CONTAINEROF(pci_dev, struct vmd_pci_device, pci);
778 
779 	*size = dev->bar[bar].size;
780 	*phys_addr = dev->bar[bar].start;
781 	*mapped_addr = (void *)dev->bar[bar].vaddr;
782 
783 	return 0;
784 }
785 
786 static int
787 vmd_dev_unmap_bar(struct spdk_pci_device *_dev, uint32_t bar, void *addr)
788 {
789 	return 0;
790 }
791 
792 static int
793 vmd_dev_cfg_read(struct spdk_pci_device *_dev, void *value, uint32_t len,
794 		 uint32_t offset)
795 {
796 	struct vmd_pci_device *dev = SPDK_CONTAINEROF(_dev, struct vmd_pci_device, pci);
797 	volatile uint8_t *src = (volatile uint8_t *)dev->header;
798 	uint8_t *dst = value;
799 	size_t i;
800 
801 	if (len + offset > PCI_MAX_CFG_SIZE) {
802 		return -1;
803 	}
804 
805 	for (i = 0; i < len; ++i) {
806 		dst[i] = src[offset + i];
807 	}
808 
809 	return 0;
810 }
811 
812 static int
813 vmd_dev_cfg_write(struct spdk_pci_device *_dev,  void *value,
814 		  uint32_t len, uint32_t offset)
815 {
816 	struct vmd_pci_device *dev = SPDK_CONTAINEROF(_dev, struct vmd_pci_device, pci);
817 	volatile uint8_t *dst = (volatile uint8_t *)dev->header;
818 	uint8_t *src = value;
819 	size_t i;
820 
821 	if ((len + offset) > PCI_MAX_CFG_SIZE) {
822 		return -1;
823 	}
824 
825 	for (i = 0; i < len; ++i) {
826 		dst[offset + i] = src[i];
827 	}
828 
829 	return 0;
830 }
831 
832 static void
833 vmd_dev_free(struct vmd_pci_device *dev)
834 {
835 	struct vmd_pci_device *bus_device = dev->bus->self;
836 	size_t i, num_bars = dev->header_type ? 2 : 6;
837 
838 	/* Release the hotplug region if the device is under hotplug-capable bus */
839 	if (bus_device && bus_device->hotplug_capable) {
840 		for (i = 0; i < num_bars; ++i) {
841 			if (dev->bar[i].start != 0) {
842 				vmd_hotplug_free_addr(&bus_device->hp, dev->bar[i].start);
843 			}
844 		}
845 	}
846 
847 	free(dev);
848 }
849 
850 static void
851 vmd_dev_detach(struct spdk_pci_device *dev)
852 {
853 	struct vmd_pci_device *vmd_device = (struct vmd_pci_device *)dev;
854 	struct vmd_pci_bus *bus = vmd_device->bus;
855 
856 	spdk_pci_unhook_device(dev);
857 	TAILQ_REMOVE(&bus->dev_list, vmd_device, tailq);
858 
859 	vmd_dev_free(vmd_device);
860 }
861 
862 static void
863 vmd_dev_init(struct vmd_pci_device *dev)
864 {
865 	dev->pci.addr.domain = dev->bus->vmd->domain;
866 	dev->pci.addr.bus = dev->bus->bus_number;
867 	dev->pci.addr.dev = dev->devfn;
868 	dev->pci.addr.func = 0;
869 	dev->pci.socket_id = spdk_pci_device_get_socket_id(dev->bus->vmd->pci);
870 	dev->pci.id.vendor_id = dev->header->common.vendor_id;
871 	dev->pci.id.device_id = dev->header->common.device_id;
872 	dev->pci.type = "vmd";
873 	dev->pci.map_bar = vmd_dev_map_bar;
874 	dev->pci.unmap_bar = vmd_dev_unmap_bar;
875 	dev->pci.cfg_read = vmd_dev_cfg_read;
876 	dev->pci.cfg_write = vmd_dev_cfg_write;
877 	dev->hotplug_capable = false;
878 	if (dev->pcie_cap != NULL) {
879 		dev->cached_slot_control = dev->pcie_cap->slot_control;
880 	}
881 }
882 
883 static int
884 vmd_init_end_device(struct vmd_pci_device *dev)
885 {
886 	struct vmd_pci_bus *bus = dev->bus;
887 	struct vmd_adapter *vmd;
888 	struct spdk_pci_driver *driver;
889 	uint8_t bdf[32];
890 	int rc;
891 
892 	if (!vmd_assign_base_addrs(dev)) {
893 		SPDK_ERRLOG("Failed to allocate BARs for device: %p\n", dev);
894 		return -1;
895 	}
896 
897 	vmd_setup_msix(dev, &bus->vmd->msix_table[0]);
898 	vmd_dev_init(dev);
899 
900 	if (vmd_is_supported_device(dev)) {
901 		spdk_pci_addr_fmt(bdf, sizeof(bdf), &dev->pci.addr);
902 		SPDK_INFOLOG(vmd, "Initializing NVMe device at %s\n", bdf);
903 		dev->pci.parent = dev->bus->vmd->pci;
904 
905 		driver = spdk_pci_nvme_get_driver();
906 		assert(driver != NULL);
907 		rc = spdk_pci_hook_device(driver, &dev->pci);
908 		if (rc != 0) {
909 			SPDK_ERRLOG("Failed to hook device %s: %s\n", bdf, spdk_strerror(-rc));
910 			return -1;
911 		}
912 
913 		vmd = bus->vmd;
914 		vmd->target[vmd->nvme_count] = dev;
915 		vmd->nvme_count++;
916 	}
917 
918 	/* Attach the device to the current bus and assign base addresses */
919 	TAILQ_INSERT_TAIL(&bus->dev_list, dev, tailq);
920 	g_end_device_count++;
921 
922 	return 0;
923 }
924 
925 /*
926  * Scans a single bus for all devices attached and return a count of
927  * how many devices found. In the VMD topology, it is assume there are no multi-
928  * function devices. Hence a bus(bridge) will not have multi function with both type
929  * 0 and 1 header.
930  *
931  * The other option  for implementing this function is the bus is an int and
932  * create a new device PciBridge. PciBridge would inherit from PciDevice with extra fields,
933  * sub/pri/sec bus. The input becomes PciPort, bus number and parent_bridge.
934  *
935  * The bus number is scanned and if a device is found, based on the header_type, create
936  * either PciBridge(1) or PciDevice(0).
937  *
938  * If a PciBridge, assign bus numbers and rescan new bus. The currently PciBridge being
939  * scanned becomes the passed in parent_bridge with the new bus number.
940  *
941  * The linked list becomes list of pciBridges with PciDevices attached.
942  *
943  * Return count of how many devices found(type1 + type 0 header devices)
944  */
945 static uint8_t
946 vmd_scan_single_bus(struct vmd_pci_bus *bus, struct vmd_pci_device *parent_bridge, bool hotplug)
947 {
948 	/* assuming only single function devices are on the bus */
949 	struct vmd_pci_device *new_dev;
950 	union express_slot_capabilities_register slot_cap;
951 	struct vmd_pci_bus *new_bus;
952 	uint8_t  device_number, dev_cnt = 0;
953 	uint8_t new_bus_num;
954 	int rc;
955 
956 	for (device_number = 0; device_number < 32; device_number++) {
957 		new_dev = vmd_alloc_dev(bus, device_number);
958 		if (new_dev == NULL) {
959 			continue;
960 		}
961 
962 		if (new_dev->header->common.header_type & PCI_HEADER_TYPE_BRIDGE) {
963 			if (hotplug) {
964 				free(new_dev);
965 				continue;
966 			}
967 
968 			slot_cap.as_uint32_t = 0;
969 			if (new_dev->pcie_cap != NULL) {
970 				slot_cap.as_uint32_t = new_dev->pcie_cap->slot_cap.as_uint32_t;
971 			}
972 
973 			new_bus_num = vmd_get_next_bus_number(bus->vmd);
974 			if (new_bus_num == 0xff) {
975 				vmd_dev_free(new_dev);
976 				return dev_cnt;
977 			}
978 			new_bus = vmd_create_new_bus(bus, new_dev, new_bus_num);
979 			if (!new_bus) {
980 				vmd_dev_free(new_dev);
981 				return dev_cnt;
982 			}
983 			new_bus->primary_bus = bus->secondary_bus;
984 			new_bus->self = new_dev;
985 			new_dev->bus_object = new_bus;
986 
987 			if (slot_cap.bit_field.hotplug_capable && new_dev->pcie_cap != NULL &&
988 			    new_dev->pcie_cap->express_cap_register.bit_field.slot_implemented) {
989 				new_bus->hotplug_buses = vmd_get_hotplug_bus_numbers(new_dev);
990 				new_bus->subordinate_bus += new_bus->hotplug_buses;
991 
992 				/* Attach hot plug instance if HP is supported */
993 				/* Hot inserted SSDs can be assigned port bus of sub-ordinate + 1 */
994 				SPDK_INFOLOG(vmd, "hotplug_capable/slot_implemented = "
995 					     "%x:%x\n", slot_cap.bit_field.hotplug_capable,
996 					     new_dev->pcie_cap->express_cap_register.bit_field.slot_implemented);
997 			}
998 
999 			new_dev->parent_bridge = parent_bridge;
1000 			new_dev->header->one.primary = new_bus->primary_bus;
1001 			new_dev->header->one.secondary = new_bus->secondary_bus;
1002 			new_dev->header->one.subordinate = new_bus->subordinate_bus;
1003 
1004 			vmd_bus_update_bridge_info(new_dev);
1005 			TAILQ_INSERT_TAIL(&bus->vmd->bus_list, new_bus, tailq);
1006 
1007 			vmd_dev_init(new_dev);
1008 			dev_cnt++;
1009 
1010 			if (slot_cap.bit_field.hotplug_capable && new_dev->pcie_cap != NULL &&
1011 			    new_dev->pcie_cap->express_cap_register.bit_field.slot_implemented) {
1012 				vmd_init_hotplug(new_dev, new_bus);
1013 			}
1014 
1015 			dev_cnt += vmd_scan_single_bus(new_bus, new_dev, hotplug);
1016 			if (new_dev->pcie_cap != NULL) {
1017 				if (new_dev->pcie_cap->express_cap_register.bit_field.device_type == SwitchUpstreamPort) {
1018 					return dev_cnt;
1019 				}
1020 			}
1021 		} else {
1022 			rc = vmd_init_end_device(new_dev);
1023 			if (rc != 0) {
1024 				vmd_dev_free(new_dev);
1025 			} else {
1026 				dev_cnt++;
1027 			}
1028 		}
1029 	}
1030 
1031 	return dev_cnt;
1032 }
1033 
1034 static void
1035 vmd_print_pci_info(struct vmd_pci_device *dev)
1036 {
1037 	if (!dev) {
1038 		return;
1039 	}
1040 
1041 	if (dev->pcie_cap != NULL) {
1042 		SPDK_INFOLOG(vmd, "PCI DEVICE: [%04X:%04X] type(%x) : %s\n",
1043 			     dev->header->common.vendor_id, dev->header->common.device_id,
1044 			     dev->pcie_cap->express_cap_register.bit_field.device_type,
1045 			     device_type[dev->pcie_cap->express_cap_register.bit_field.device_type]);
1046 	} else {
1047 		SPDK_INFOLOG(vmd, "PCI DEVICE: [%04X:%04X]\n",
1048 			     dev->header->common.vendor_id, dev->header->common.device_id);
1049 	}
1050 
1051 	SPDK_INFOLOG(vmd, "\tDOMAIN:BDF: %04x:%02x:%02x:%x\n", dev->pci.addr.domain,
1052 		     dev->pci.addr.bus, dev->pci.addr.dev, dev->pci.addr.func);
1053 
1054 	if (!(dev->header_type & PCI_HEADER_TYPE_BRIDGE) && dev->bus) {
1055 		SPDK_INFOLOG(vmd, "\tbase addr: %x : %p\n",
1056 			     dev->header->zero.BAR[0], (void *)dev->bar[0].vaddr);
1057 	}
1058 
1059 	if ((dev->header_type & PCI_HEADER_TYPE_BRIDGE)) {
1060 		SPDK_INFOLOG(vmd, "\tPrimary = %d, Secondary = %d, Subordinate = %d\n",
1061 			     dev->header->one.primary, dev->header->one.secondary, dev->header->one.subordinate);
1062 		if (dev->pcie_cap && dev->pcie_cap->express_cap_register.bit_field.slot_implemented) {
1063 			SPDK_INFOLOG(vmd, "\tSlot implemented on this device.\n");
1064 			if (dev->pcie_cap->slot_cap.bit_field.hotplug_capable) {
1065 				SPDK_INFOLOG(vmd, "Device has HOT-PLUG capable slot.\n");
1066 			}
1067 		}
1068 	}
1069 
1070 	if (dev->sn_cap != NULL) {
1071 		uint8_t *snLow = (uint8_t *)&dev->sn_cap->sn_low;
1072 		uint8_t *snHi = (uint8_t *)&dev->sn_cap->sn_hi;
1073 
1074 		SPDK_INFOLOG(vmd, "\tSN: %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x\n",
1075 			     snHi[3], snHi[2], snHi[1], snHi[0], snLow[3], snLow[2], snLow[1], snLow[0]);
1076 	}
1077 }
1078 
1079 static void
1080 vmd_cache_scan_info(struct vmd_pci_device *dev)
1081 {
1082 	uint32_t reg __attribute__((unused));
1083 
1084 	if (dev->header_type == PCI_HEADER_TYPE_NORMAL) {
1085 		return;
1086 	}
1087 
1088 	SPDK_INFOLOG(vmd, "vendor/device id:%x:%x\n", dev->header->common.vendor_id,
1089 		     dev->header->common.device_id);
1090 
1091 	if (vmd_device_is_root_port(dev->header)) {
1092 		dev->header->one.prefetch_base_upper = VMD_UPPER_BASE_SIGNATURE;
1093 		reg = dev->header->one.prefetch_base_upper;
1094 		dev->header->one.prefetch_limit_upper = VMD_UPPER_LIMIT_SIGNATURE;
1095 		reg = dev->header->one.prefetch_limit_upper;
1096 
1097 		SPDK_INFOLOG(vmd, "prefetch: %x:%x\n",
1098 			     dev->header->one.prefetch_base_upper,
1099 			     dev->header->one.prefetch_limit_upper);
1100 	}
1101 }
1102 
1103 static void
1104 vmd_reset_root_ports(struct vmd_pci_bus *bus)
1105 {
1106 	volatile struct pci_header *header;
1107 	uint32_t devfn;
1108 
1109 	/*
1110 	 * The root ports might have been configured by some other driver (e.g.  Linux kernel) prior
1111 	 * to loading the SPDK one, so we need to clear it.  We need to before the scanning process,
1112 	 * as it's depth-first, so when scanning the initial root ports, the latter ones might still
1113 	 * be using stale configuration.  This can lead to two bridges having the same
1114 	 * secondary/subordinate bus configuration, which, of course, isn't correct.
1115 	 * (Note: this fixed issue #2413.)
1116 	 */
1117 	for (devfn = 0; devfn < 32; ++devfn) {
1118 		if (!vmd_bus_device_present(bus, devfn)) {
1119 			continue;
1120 		}
1121 
1122 		header = (volatile void *)(bus->vmd->cfg_vaddr +
1123 					   CONFIG_OFFSET_ADDR(bus->config_bus_number, devfn, 0, 0));
1124 		if (vmd_device_is_root_port(header) && !vmd_device_is_enumerated(header)) {
1125 			vmd_reset_base_limit_registers(header);
1126 		}
1127 	}
1128 }
1129 
1130 static uint8_t
1131 vmd_scan_pcibus(struct vmd_pci_bus *bus)
1132 {
1133 	struct vmd_pci_bus *bus_entry;
1134 	struct vmd_pci_device *dev;
1135 	uint8_t dev_cnt;
1136 
1137 	vmd_reset_root_ports(bus);
1138 
1139 	g_end_device_count = 0;
1140 	TAILQ_INSERT_TAIL(&bus->vmd->bus_list, bus, tailq);
1141 	bus->vmd->next_bus_number = bus->bus_number + 1;
1142 	dev_cnt = vmd_scan_single_bus(bus, NULL, false);
1143 
1144 	SPDK_INFOLOG(vmd, "VMD scan found %u devices\n", dev_cnt);
1145 	SPDK_INFOLOG(vmd, "VMD scan found %u END DEVICES\n", g_end_device_count);
1146 
1147 	SPDK_INFOLOG(vmd, "PCIe devices attached to VMD %04x:%02x:%02x:%x...\n",
1148 		     bus->vmd->pci->addr.domain, bus->vmd->pci->addr.bus,
1149 		     bus->vmd->pci->addr.dev, bus->vmd->pci->addr.func);
1150 
1151 	TAILQ_FOREACH(bus_entry, &bus->vmd->bus_list, tailq) {
1152 		if (bus_entry->self != NULL) {
1153 			vmd_print_pci_info(bus_entry->self);
1154 			vmd_cache_scan_info(bus_entry->self);
1155 		}
1156 
1157 		TAILQ_FOREACH(dev, &bus_entry->dev_list, tailq) {
1158 			vmd_print_pci_info(dev);
1159 		}
1160 	}
1161 
1162 	return dev_cnt;
1163 }
1164 
1165 static int
1166 vmd_map_bars(struct vmd_adapter *vmd, struct spdk_pci_device *dev)
1167 {
1168 	int rc;
1169 
1170 	rc = spdk_pci_device_map_bar(dev, 0, (void **)&vmd->cfg_vaddr,
1171 				     &vmd->cfgbar, &vmd->cfgbar_size);
1172 	if (rc == 0) {
1173 		rc = spdk_pci_device_map_bar(dev, 2, (void **)&vmd->mem_vaddr,
1174 					     &vmd->membar, &vmd->membar_size);
1175 	}
1176 
1177 	if (rc == 0) {
1178 		rc = spdk_pci_device_map_bar(dev, 4, (void **)&vmd->msix_vaddr,
1179 					     &vmd->msixbar, &vmd->msixbar_size);
1180 	}
1181 
1182 	if (rc == 0) {
1183 		vmd->physical_addr = vmd->membar;
1184 		vmd->current_addr_size = vmd->membar_size;
1185 	}
1186 	return rc;
1187 }
1188 
1189 static void
1190 vmd_set_starting_bus_number(struct vmd_adapter *vmd, uint8_t *bus_start,
1191 			    uint8_t *max_bus)
1192 {
1193 	uint32_t vmd_cap = 0, vmd_config = 0;
1194 	uint8_t bus_restrict_cap, bus_restrictions;
1195 
1196 	spdk_pci_device_cfg_read32(vmd->pci, &vmd_cap, PCI_VMD_VMCAP);
1197 	spdk_pci_device_cfg_read32(vmd->pci, &vmd_config, PCI_VMD_VMCONFIG);
1198 
1199 	bus_restrict_cap = vmd_cap & 0x1; /* bit 0 */
1200 	bus_restrictions = (vmd_config >> 8) & 0x3; /* bits 8-9 */
1201 	if ((bus_restrict_cap == 0x1) && (bus_restrictions == 0x1)) {
1202 		*bus_start = 128;
1203 		*max_bus = 255;
1204 	} else {
1205 		*bus_start = 0;
1206 		*max_bus = 127;
1207 	}
1208 }
1209 
1210 static int
1211 vmd_enumerate_devices(struct vmd_adapter *vmd)
1212 {
1213 	uint8_t max_bus, bus_start;
1214 
1215 	vmd->vmd_bus.vmd = vmd;
1216 	vmd->vmd_bus.domain = vmd->pci->addr.domain;
1217 
1218 	if (vmd->pci->id.device_id == PCI_DEVICE_ID_INTEL_VMD_ICX) {
1219 		vmd_set_starting_bus_number(vmd, &bus_start, &max_bus);
1220 		vmd->vmd_bus.bus_start = bus_start;
1221 		vmd->vmd_bus.secondary_bus = vmd->vmd_bus.subordinate_bus = vmd->vmd_bus.bus_start;
1222 		vmd->vmd_bus.primary_bus = vmd->vmd_bus.bus_number = vmd->vmd_bus.bus_start;
1223 		vmd->max_pci_bus = max_bus;
1224 	} else {
1225 		vmd->vmd_bus.bus_start = 0;
1226 		vmd->vmd_bus.secondary_bus = vmd->vmd_bus.subordinate_bus = 0;
1227 		vmd->vmd_bus.primary_bus = vmd->vmd_bus.bus_number = 0;
1228 		vmd->max_pci_bus = PCI_MAX_BUS_NUMBER;
1229 	}
1230 
1231 	return vmd_scan_pcibus(&vmd->vmd_bus);
1232 }
1233 
1234 struct vmd_pci_device *
1235 vmd_find_device(const struct spdk_pci_addr *addr)
1236 {
1237 	struct vmd_pci_bus *bus;
1238 	struct vmd_pci_device *dev;
1239 	uint32_t i;
1240 
1241 	for (i = 0; i < g_vmd_container.count; ++i) {
1242 		TAILQ_FOREACH(bus, &g_vmd_container.vmd[i].bus_list, tailq) {
1243 			if (bus->self) {
1244 				if (spdk_pci_addr_compare(&bus->self->pci.addr, addr) == 0) {
1245 					return bus->self;
1246 				}
1247 			}
1248 
1249 			TAILQ_FOREACH(dev, &bus->dev_list, tailq) {
1250 				if (spdk_pci_addr_compare(&dev->pci.addr, addr) == 0) {
1251 					return dev;
1252 				}
1253 			}
1254 		}
1255 	}
1256 
1257 	return NULL;
1258 }
1259 
1260 static int
1261 vmd_enum_cb(void *ctx, struct spdk_pci_device *pci_dev)
1262 {
1263 	uint32_t cmd_reg = 0;
1264 	char bdf[32] = {0};
1265 	struct vmd_container *vmd_c = ctx;
1266 	size_t i;
1267 
1268 	spdk_pci_device_cfg_read32(pci_dev, &cmd_reg, 4);
1269 	cmd_reg |= 0x6;                      /* PCI bus master/memory enable. */
1270 	spdk_pci_device_cfg_write32(pci_dev, cmd_reg, 4);
1271 
1272 	spdk_pci_addr_fmt(bdf, sizeof(bdf), &pci_dev->addr);
1273 	SPDK_INFOLOG(vmd, "Found a VMD[ %d ] at %s\n", vmd_c->count, bdf);
1274 
1275 	/* map vmd bars */
1276 	i = vmd_c->count;
1277 	vmd_c->vmd[i].pci = pci_dev;
1278 	vmd_c->vmd[i].vmd_index = i;
1279 	vmd_c->vmd[i].domain =
1280 		(pci_dev->addr.bus << 16) | (pci_dev->addr.dev << 8) | pci_dev->addr.func;
1281 	TAILQ_INIT(&vmd_c->vmd[i].bus_list);
1282 
1283 	if (vmd_map_bars(&vmd_c->vmd[i], pci_dev) == -1) {
1284 		return -1;
1285 	}
1286 
1287 	SPDK_INFOLOG(vmd, "vmd config bar(%p) vaddr(%p) size(%x)\n",
1288 		     (void *)vmd_c->vmd[i].cfgbar, (void *)vmd_c->vmd[i].cfg_vaddr,
1289 		     (uint32_t)vmd_c->vmd[i].cfgbar_size);
1290 	SPDK_INFOLOG(vmd, "vmd mem bar(%p) vaddr(%p) size(%x)\n",
1291 		     (void *)vmd_c->vmd[i].membar, (void *)vmd_c->vmd[i].mem_vaddr,
1292 		     (uint32_t)vmd_c->vmd[i].membar_size);
1293 	SPDK_INFOLOG(vmd, "vmd msix bar(%p) vaddr(%p) size(%x)\n\n",
1294 		     (void *)vmd_c->vmd[i].msixbar, (void *)vmd_c->vmd[i].msix_vaddr,
1295 		     (uint32_t)vmd_c->vmd[i].msixbar_size);
1296 
1297 	vmd_c->count = i + 1;
1298 
1299 	vmd_enumerate_devices(&vmd_c->vmd[i]);
1300 
1301 	return 0;
1302 }
1303 
1304 int
1305 spdk_vmd_pci_device_list(struct spdk_pci_addr vmd_addr, struct spdk_pci_device *nvme_list)
1306 {
1307 	int cnt = 0;
1308 	struct vmd_pci_bus *bus;
1309 	struct vmd_pci_device *dev;
1310 	uint32_t i;
1311 
1312 	if (!nvme_list) {
1313 		return -1;
1314 	}
1315 
1316 	for (i = 0; i < g_vmd_container.count; ++i) {
1317 		if (spdk_pci_addr_compare(&vmd_addr, &g_vmd_container.vmd[i].pci->addr) == 0) {
1318 			TAILQ_FOREACH(bus, &g_vmd_container.vmd[i].bus_list, tailq) {
1319 				TAILQ_FOREACH(dev, &bus->dev_list, tailq) {
1320 					nvme_list[cnt++] = dev->pci;
1321 					if (!dev->is_hooked) {
1322 						vmd_dev_init(dev);
1323 						dev->is_hooked = 1;
1324 					}
1325 				}
1326 			}
1327 		}
1328 	}
1329 
1330 	return cnt;
1331 }
1332 
1333 static void
1334 vmd_clear_hotplug_status(struct vmd_pci_bus *bus)
1335 {
1336 	struct vmd_pci_device *device = bus->self;
1337 	uint16_t status __attribute__((unused));
1338 
1339 	status = device->pcie_cap->slot_status.as_uint16_t;
1340 	device->pcie_cap->slot_status.as_uint16_t = status;
1341 	status = device->pcie_cap->slot_status.as_uint16_t;
1342 
1343 	status = device->pcie_cap->link_status.as_uint16_t;
1344 	device->pcie_cap->link_status.as_uint16_t = status;
1345 	status = device->pcie_cap->link_status.as_uint16_t;
1346 }
1347 
1348 static void
1349 vmd_bus_handle_hotplug(struct vmd_pci_bus *bus)
1350 {
1351 	uint8_t num_devices, sleep_count;
1352 
1353 	for (sleep_count = 0; sleep_count < 20; ++sleep_count) {
1354 		/* Scan until a new device is found */
1355 		num_devices = vmd_scan_single_bus(bus, bus->self, true);
1356 		if (num_devices > 0) {
1357 			break;
1358 		}
1359 
1360 		spdk_delay_us(200000);
1361 	}
1362 
1363 	if (num_devices == 0) {
1364 		SPDK_ERRLOG("Timed out while scanning for hotplugged devices\n");
1365 	}
1366 }
1367 
1368 static void
1369 vmd_remove_device(struct vmd_pci_device *device)
1370 {
1371 	device->pci.internal.pending_removal = true;
1372 
1373 	/* If the device isn't attached, remove it immediately */
1374 	if (!device->pci.internal.attached) {
1375 		vmd_dev_detach(&device->pci);
1376 	}
1377 }
1378 
1379 static void
1380 vmd_bus_handle_hotremove(struct vmd_pci_bus *bus)
1381 {
1382 	struct vmd_pci_device *device, *tmpdev;
1383 
1384 	TAILQ_FOREACH_SAFE(device, &bus->dev_list, tailq, tmpdev) {
1385 		if (!vmd_bus_device_present(bus, device->devfn)) {
1386 			vmd_remove_device(device);
1387 		}
1388 	}
1389 }
1390 
1391 int
1392 spdk_vmd_hotplug_monitor(void)
1393 {
1394 	struct vmd_pci_bus *bus;
1395 	struct vmd_pci_device *device;
1396 	int num_hotplugs = 0;
1397 	uint32_t i;
1398 
1399 	for (i = 0; i < g_vmd_container.count; ++i) {
1400 		TAILQ_FOREACH(bus, &g_vmd_container.vmd[i].bus_list, tailq) {
1401 			device = bus->self;
1402 			if (device == NULL || !device->hotplug_capable) {
1403 				continue;
1404 			}
1405 
1406 			if (device->pcie_cap->slot_status.bit_field.datalink_state_changed != 1) {
1407 				continue;
1408 			}
1409 
1410 			if (device->pcie_cap->link_status.bit_field.datalink_layer_active == 1) {
1411 				SPDK_INFOLOG(vmd, "Device hotplug detected on bus "
1412 					     "%"PRIu32"\n", bus->bus_number);
1413 				vmd_bus_handle_hotplug(bus);
1414 			} else {
1415 				SPDK_INFOLOG(vmd, "Device hotremove detected on bus "
1416 					     "%"PRIu32"\n", bus->bus_number);
1417 				vmd_bus_handle_hotremove(bus);
1418 			}
1419 
1420 			vmd_clear_hotplug_status(bus);
1421 			num_hotplugs++;
1422 		}
1423 	}
1424 
1425 	return num_hotplugs;
1426 }
1427 
1428 int
1429 spdk_vmd_remove_device(const struct spdk_pci_addr *addr)
1430 {
1431 	struct vmd_pci_device *device;
1432 
1433 	device = vmd_find_device(addr);
1434 	if (device == NULL) {
1435 		return -ENODEV;
1436 	}
1437 
1438 	assert(strcmp(spdk_pci_device_get_type(&device->pci), "vmd") == 0);
1439 	vmd_remove_device(device);
1440 
1441 	return 0;
1442 }
1443 
1444 int
1445 spdk_vmd_rescan(void)
1446 {
1447 	struct vmd_pci_bus *bus;
1448 	uint32_t i;
1449 	int rc = 0;
1450 
1451 	for (i = 0; i < g_vmd_container.count; ++i) {
1452 		TAILQ_FOREACH(bus, &g_vmd_container.vmd[i].bus_list, tailq) {
1453 			rc += vmd_scan_single_bus(bus, bus->self, true);
1454 		}
1455 	}
1456 
1457 	return rc;
1458 }
1459 
1460 static int
1461 vmd_attach_device(const struct spdk_pci_addr *addr)
1462 {
1463 	struct vmd_pci_bus *bus;
1464 	struct vmd_adapter *vmd;
1465 	struct vmd_pci_device *dev;
1466 	uint32_t i;
1467 	int rc;
1468 
1469 	/* VMD always sets function to zero */
1470 	if (addr->func != 0) {
1471 		return -ENODEV;
1472 	}
1473 
1474 	for (i = 0; i < g_vmd_container.count; ++i) {
1475 		vmd = &g_vmd_container.vmd[i];
1476 		if (vmd->domain != addr->domain) {
1477 			continue;
1478 		}
1479 
1480 		TAILQ_FOREACH(bus, &vmd->bus_list, tailq) {
1481 			if (bus->bus_number != addr->bus) {
1482 				continue;
1483 			}
1484 
1485 			dev = vmd_alloc_dev(bus, addr->dev);
1486 			if (dev == NULL) {
1487 				return -ENODEV;
1488 			}
1489 
1490 			/* Only allow attaching endpoint devices */
1491 			if (dev->header->common.header_type & PCI_HEADER_TYPE_BRIDGE) {
1492 				free(dev);
1493 				return -ENODEV;
1494 			}
1495 
1496 			rc = vmd_init_end_device(dev);
1497 			if (rc != 0) {
1498 				free(dev);
1499 				return -ENODEV;
1500 			}
1501 
1502 			return 0;
1503 		}
1504 	}
1505 
1506 	return -ENODEV;
1507 }
1508 
1509 static void
1510 vmd_detach_device(struct spdk_pci_device *pci_dev)
1511 {
1512 	struct vmd_pci_device *dev = SPDK_CONTAINEROF(pci_dev, struct vmd_pci_device, pci);
1513 
1514 	assert(strcmp(spdk_pci_device_get_type(pci_dev), "vmd") == 0);
1515 	assert(vmd_find_device(&pci_dev->addr) != NULL);
1516 
1517 	vmd_remove_device(dev);
1518 }
1519 
1520 static struct spdk_pci_device_provider g_vmd_device_provider = {
1521 	.name = "vmd",
1522 	.attach_cb = vmd_attach_device,
1523 	.detach_cb = vmd_detach_device,
1524 };
1525 
1526 SPDK_PCI_REGISTER_DEVICE_PROVIDER(vmd, &g_vmd_device_provider);
1527 
1528 int
1529 spdk_vmd_init(void)
1530 {
1531 	return spdk_pci_enumerate(spdk_pci_vmd_get_driver(), vmd_enum_cb, &g_vmd_container);
1532 }
1533 
1534 void
1535 spdk_vmd_fini(void)
1536 {
1537 	uint32_t i;
1538 
1539 	for (i = 0; i < g_vmd_container.count; ++i) {
1540 		spdk_pci_device_detach(g_vmd_container.vmd[i].pci);
1541 	}
1542 }
1543 
1544 SPDK_LOG_REGISTER_COMPONENT(vmd)
1545