xref: /spdk/lib/vmd/vmd.c (revision 588dfe314bb83d86effdf67ec42837b11c2620bf)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2019 Intel Corporation.
3  *   All rights reserved.
4  */
5 
6 #include "vmd_internal.h"
7 
8 #include "spdk/stdinc.h"
9 #include "spdk/string.h"
10 #include "spdk/likely.h"
11 
12 static unsigned char *device_type[] = {
13 	"PCI Express Endpoint",
14 	"Legacy PCI Express Endpoint",
15 	"Reserved 1",
16 	"Reserved 2",
17 	"Root Port of PCI Express Root Complex",
18 	"Upstream Port of PCI Express Switch",
19 	"Downstream Port of PCI Express Switch",
20 	"PCI Express to PCI/PCI-X Bridge",
21 	"PCI/PCI-X to PCI Express Bridge",
22 	"Root Complex Integrated Endpoint",
23 	"Root Complex Event Collector",
24 	"Reserved Capability"
25 };
26 
27 /*
28  * Container for all VMD adapter probed in the system.
29  */
30 struct vmd_container {
31 	uint32_t count;
32 	struct vmd_adapter vmd[MAX_VMD_SUPPORTED];
33 };
34 
35 static struct vmd_container g_vmd_container;
36 static uint8_t g_end_device_count;
37 
38 static bool
39 vmd_is_valid_cfg_addr(struct vmd_pci_bus *bus, uint64_t addr)
40 {
41 	return addr >= (uint64_t)bus->vmd->cfg_vaddr &&
42 	       addr < bus->vmd->cfgbar_size + (uint64_t)bus->vmd->cfg_vaddr;
43 }
44 
45 static void
46 vmd_align_base_addrs(struct vmd_adapter *vmd, uint32_t alignment)
47 {
48 	uint32_t pad;
49 
50 	/*
51 	 *  Device is not in hot plug path, align the base address remaining from membar 1.
52 	 */
53 	if (vmd->physical_addr & (alignment - 1)) {
54 		pad = alignment - (vmd->physical_addr & (alignment - 1));
55 		vmd->physical_addr += pad;
56 		vmd->current_addr_size -= pad;
57 	}
58 }
59 
60 static bool
61 vmd_device_is_enumerated(volatile struct pci_header *header)
62 {
63 	return header->one.prefetch_base_upper == VMD_UPPER_BASE_SIGNATURE &&
64 	       header->one.prefetch_limit_upper == VMD_UPPER_LIMIT_SIGNATURE;
65 }
66 
67 static bool
68 vmd_device_is_root_port(volatile struct pci_header *header)
69 {
70 	return header->common.vendor_id == SPDK_PCI_VID_INTEL &&
71 	       (header->common.device_id == PCI_ROOT_PORT_A_INTEL_SKX ||
72 		header->common.device_id == PCI_ROOT_PORT_B_INTEL_SKX ||
73 		header->common.device_id == PCI_ROOT_PORT_C_INTEL_SKX ||
74 		header->common.device_id == PCI_ROOT_PORT_D_INTEL_SKX ||
75 		header->common.device_id == PCI_ROOT_PORT_A_INTEL_ICX ||
76 		header->common.device_id == PCI_ROOT_PORT_B_INTEL_ICX ||
77 		header->common.device_id == PCI_ROOT_PORT_C_INTEL_ICX ||
78 		header->common.device_id == PCI_ROOT_PORT_D_INTEL_ICX);
79 }
80 
81 static void
82 vmd_hotplug_coalesce_regions(struct vmd_hot_plug *hp)
83 {
84 	struct pci_mem_mgr *region, *prev;
85 
86 	do {
87 		prev = NULL;
88 		TAILQ_FOREACH(region, &hp->free_mem_queue, tailq) {
89 			if (prev != NULL && (prev->addr + prev->size == region->addr)) {
90 				break;
91 			}
92 
93 			prev = region;
94 		}
95 
96 		if (region != NULL) {
97 			prev->size += region->size;
98 			TAILQ_REMOVE(&hp->free_mem_queue, region, tailq);
99 			TAILQ_INSERT_TAIL(&hp->unused_mem_queue, region, tailq);
100 		}
101 	} while (region != NULL);
102 }
103 
104 static void
105 vmd_hotplug_free_region(struct vmd_hot_plug *hp, struct pci_mem_mgr *region)
106 {
107 	struct pci_mem_mgr *current, *prev = NULL;
108 
109 	assert(region->addr >= hp->bar.start && region->addr < hp->bar.start + hp->bar.size);
110 
111 	TAILQ_FOREACH(current, &hp->free_mem_queue, tailq) {
112 		if (current->addr > region->addr) {
113 			break;
114 		}
115 
116 		prev = current;
117 	}
118 
119 	if (prev != NULL) {
120 		assert(prev->addr + prev->size <= region->addr);
121 		assert(current == NULL || (region->addr + region->size <= current->addr));
122 		TAILQ_INSERT_AFTER(&hp->free_mem_queue, prev, region, tailq);
123 	} else {
124 		TAILQ_INSERT_HEAD(&hp->free_mem_queue, region, tailq);
125 	}
126 
127 	vmd_hotplug_coalesce_regions(hp);
128 }
129 
130 static void
131 vmd_hotplug_free_addr(struct vmd_hot_plug *hp, uint64_t addr)
132 {
133 	struct pci_mem_mgr *region;
134 
135 	TAILQ_FOREACH(region, &hp->alloc_mem_queue, tailq) {
136 		if (region->addr == addr) {
137 			break;
138 		}
139 	}
140 
141 	assert(region != NULL);
142 	TAILQ_REMOVE(&hp->alloc_mem_queue, region, tailq);
143 
144 	vmd_hotplug_free_region(hp, region);
145 }
146 
147 static uint64_t
148 vmd_hotplug_allocate_base_addr(struct vmd_hot_plug *hp, uint32_t size)
149 {
150 	struct pci_mem_mgr *region = NULL, *free_region;
151 
152 	TAILQ_FOREACH(region, &hp->free_mem_queue, tailq) {
153 		if (region->size >= size) {
154 			break;
155 		}
156 	}
157 
158 	if (region == NULL) {
159 		SPDK_INFOLOG(vmd, "Unable to find free hotplug memory region of size:"
160 			     "%"PRIx32"\n", size);
161 		return 0;
162 	}
163 
164 	TAILQ_REMOVE(&hp->free_mem_queue, region, tailq);
165 	if (size < region->size) {
166 		free_region = TAILQ_FIRST(&hp->unused_mem_queue);
167 		if (free_region == NULL) {
168 			SPDK_INFOLOG(vmd, "Unable to find unused descriptor to store the "
169 				     "free region of size: %"PRIu32"\n", region->size - size);
170 		} else {
171 			TAILQ_REMOVE(&hp->unused_mem_queue, free_region, tailq);
172 			free_region->size = region->size - size;
173 			free_region->addr = region->addr + size;
174 			region->size = size;
175 			vmd_hotplug_free_region(hp, free_region);
176 		}
177 	}
178 
179 	TAILQ_INSERT_TAIL(&hp->alloc_mem_queue, region, tailq);
180 
181 	return region->addr;
182 }
183 
184 /*
185  *  Allocates an address from vmd membar for the input memory size
186  *  vmdAdapter - vmd adapter object
187  *  dev - vmd_pci_device to allocate a base address for.
188  *  size - size of the memory window requested.
189  *  Size must be an integral multiple of 2. Addresses are returned on the size boundary.
190  *  Returns physical address within the VMD membar window, or 0x0 if cannot allocate window.
191  *  Consider increasing the size of vmd membar if 0x0 is returned.
192  */
193 static uint64_t
194 vmd_allocate_base_addr(struct vmd_adapter *vmd, struct vmd_pci_device *dev, uint32_t size)
195 {
196 	uint64_t base_address = 0, padding = 0;
197 	struct vmd_pci_bus *hp_bus;
198 
199 	if (size && ((size & (~size + 1)) != size)) {
200 		return base_address;
201 	}
202 
203 	/*
204 	 *  If device is downstream of a hot plug port, allocate address from the
205 	 *  range dedicated for the hot plug slot. Search the list of addresses allocated to determine
206 	 *  if a free range exists that satisfy the input request.  If a free range cannot be found,
207 	 *  get a buffer from the  unused chunk. First fit algorithm, is used.
208 	 */
209 	if (dev) {
210 		hp_bus = dev->parent;
211 		if (hp_bus && hp_bus->self && hp_bus->self->hotplug_capable) {
212 			return vmd_hotplug_allocate_base_addr(&hp_bus->self->hp, size);
213 		}
214 	}
215 
216 	/* Ensure physical membar allocated is size aligned */
217 	if (vmd->physical_addr & (size - 1)) {
218 		padding = size - (vmd->physical_addr & (size - 1));
219 	}
220 
221 	/* Allocate from membar if enough memory is left */
222 	if (vmd->current_addr_size >= size + padding) {
223 		base_address = vmd->physical_addr + padding;
224 		vmd->physical_addr += size + padding;
225 		vmd->current_addr_size -= size + padding;
226 	}
227 
228 	SPDK_INFOLOG(vmd, "allocated(size) %" PRIx64 " (%x)\n", base_address, size);
229 
230 	return base_address;
231 }
232 
233 static bool
234 vmd_is_end_device(struct vmd_pci_device *dev)
235 {
236 	return (dev && dev->header) &&
237 	       ((dev->header->common.header_type & ~PCI_MULTI_FUNCTION) == PCI_HEADER_TYPE_NORMAL);
238 }
239 
240 static void
241 vmd_update_base_limit_register(struct vmd_pci_device *dev, uint16_t base, uint16_t limit)
242 {
243 	struct vmd_pci_bus *bus;
244 	struct vmd_pci_device *bridge;
245 
246 	if (base == 0 ||  limit == 0) {
247 		return;
248 	}
249 
250 	if (dev->header->common.header_type == PCI_HEADER_TYPE_BRIDGE) {
251 		bus = dev->bus_object;
252 	} else {
253 		bus = dev->parent;
254 	}
255 
256 	bridge = bus->self;
257 	SPDK_INFOLOG(vmd, "base:limit = %x:%x\n", bridge->header->one.mem_base,
258 		     bridge->header->one.mem_limit);
259 
260 	if (dev->bus->vmd->scan_completed) {
261 		return;
262 	}
263 
264 	while (bus && bus->self != NULL) {
265 		bridge = bus->self;
266 
267 		/* This is only for 32-bit memory space, need to revisit to support 64-bit */
268 		if (bridge->header->one.mem_base > base) {
269 			bridge->header->one.mem_base = base;
270 			base = bridge->header->one.mem_base;
271 		}
272 
273 		if (bridge->header->one.mem_limit < limit) {
274 			bridge->header->one.mem_limit = limit;
275 			limit = bridge->header->one.mem_limit;
276 		}
277 
278 		bus = bus->parent;
279 	}
280 }
281 
282 static uint64_t
283 vmd_get_base_addr(struct vmd_pci_device *dev, uint32_t index, uint32_t size)
284 {
285 	struct vmd_pci_bus *bus = dev->parent;
286 
287 	if (dev->header_type == PCI_HEADER_TYPE_BRIDGE) {
288 		return dev->header->zero.BAR[index] & ~0xf;
289 	} else {
290 		if (bus->self->hotplug_capable) {
291 			return vmd_hotplug_allocate_base_addr(&bus->self->hp, size);
292 		} else {
293 			return (uint64_t)bus->self->header->one.mem_base << 16;
294 		}
295 	}
296 }
297 
298 static bool
299 vmd_assign_base_addrs(struct vmd_pci_device *dev)
300 {
301 	uint16_t mem_base = 0, mem_limit = 0;
302 	unsigned char mem_attr = 0;
303 	int last;
304 	struct vmd_adapter *vmd = NULL;
305 	bool ret_val = false;
306 	uint32_t bar_value;
307 	uint32_t table_offset;
308 
309 	if (dev && dev->bus) {
310 		vmd = dev->bus->vmd;
311 	}
312 
313 	if (!vmd) {
314 		return 0;
315 	}
316 
317 	vmd_align_base_addrs(vmd, ONE_MB);
318 
319 	last = dev->header_type ? 2 : 6;
320 	for (int i = 0; i < last; i++) {
321 		bar_value = dev->header->zero.BAR[i];
322 		dev->header->zero.BAR[i] = ~(0U);
323 		dev->bar[i].size = dev->header->zero.BAR[i];
324 		dev->header->zero.BAR[i] = bar_value;
325 
326 		if (dev->bar[i].size == ~(0U) || dev->bar[i].size == 0  ||
327 		    dev->header->zero.BAR[i] & 1) {
328 			dev->bar[i].size = 0;
329 			continue;
330 		}
331 		mem_attr = dev->bar[i].size & PCI_BASE_ADDR_MASK;
332 		dev->bar[i].size = TWOS_COMPLEMENT(dev->bar[i].size & PCI_BASE_ADDR_MASK);
333 
334 		if (vmd->scan_completed) {
335 			dev->bar[i].start = vmd_get_base_addr(dev, i, dev->bar[i].size);
336 		} else {
337 			dev->bar[i].start = vmd_allocate_base_addr(vmd, dev, dev->bar[i].size);
338 		}
339 
340 		dev->header->zero.BAR[i] = (uint32_t)dev->bar[i].start;
341 
342 		if (!dev->bar[i].start) {
343 			if (mem_attr == (PCI_BAR_MEMORY_PREFETCH | PCI_BAR_MEMORY_TYPE_64)) {
344 				i++;
345 			}
346 			continue;
347 		}
348 
349 		dev->bar[i].vaddr = ((uint64_t)vmd->mem_vaddr + (dev->bar[i].start - vmd->membar));
350 		mem_limit = BRIDGE_BASEREG(dev->header->zero.BAR[i]) +
351 			    BRIDGE_BASEREG(dev->bar[i].size - 1);
352 		if (!mem_base) {
353 			mem_base = BRIDGE_BASEREG(dev->header->zero.BAR[i]);
354 		}
355 
356 		ret_val = true;
357 
358 		if (mem_attr == (PCI_BAR_MEMORY_PREFETCH | PCI_BAR_MEMORY_TYPE_64)) {
359 			i++;
360 			if (i < last) {
361 				dev->header->zero.BAR[i] = (uint32_t)(dev->bar[i].start >> PCI_DWORD_SHIFT);
362 			}
363 		}
364 	}
365 
366 	/* Enable device MEM and bus mastering */
367 	dev->header->zero.command |= (PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
368 	uint16_t cmd = dev->header->zero.command;
369 	cmd++;
370 
371 	if (dev->msix_cap && ret_val) {
372 		table_offset = ((volatile struct pci_msix_cap *)dev->msix_cap)->msix_table_offset;
373 		if (dev->bar[table_offset & 0x3].vaddr) {
374 			dev->msix_table = (volatile struct pci_msix_table_entry *)
375 					  (dev->bar[table_offset & 0x3].vaddr + (table_offset & 0xfff8));
376 		}
377 	}
378 
379 	if (ret_val && vmd_is_end_device(dev)) {
380 		vmd_update_base_limit_register(dev, mem_base, mem_limit);
381 	}
382 
383 	return ret_val;
384 }
385 
386 static void
387 vmd_get_device_capabilities(struct vmd_pci_device *dev)
388 
389 {
390 	volatile uint8_t *config_space;
391 	uint8_t capabilities_offset;
392 	struct pci_capabilities_header *capabilities_hdr;
393 
394 	config_space = (volatile uint8_t *)dev->header;
395 	if ((dev->header->common.status  & PCI_CAPABILITIES_LIST) == 0) {
396 		return;
397 	}
398 
399 	capabilities_offset = dev->header->zero.cap_pointer;
400 	if (dev->header->common.header_type & PCI_HEADER_TYPE_BRIDGE) {
401 		capabilities_offset = dev->header->one.cap_pointer;
402 	}
403 
404 	while (capabilities_offset > 0) {
405 		capabilities_hdr = (struct pci_capabilities_header *)
406 				   &config_space[capabilities_offset];
407 		switch (capabilities_hdr->capability_id) {
408 		case CAPABILITY_ID_PCI_EXPRESS:
409 			dev->pcie_cap = (volatile struct pci_express_cap *)(capabilities_hdr);
410 			break;
411 
412 		case CAPABILITY_ID_MSI:
413 			dev->msi_cap = (volatile struct pci_msi_cap *)capabilities_hdr;
414 			break;
415 
416 		case CAPABILITY_ID_MSIX:
417 			dev->msix_cap = (volatile struct pci_msix_capability *)capabilities_hdr;
418 			dev->msix_table_size = dev->msix_cap->message_control.bit.table_size + 1;
419 			break;
420 
421 		default:
422 			break;
423 		}
424 		capabilities_offset = capabilities_hdr->next;
425 	}
426 }
427 
428 static volatile struct pci_enhanced_capability_header *
429 vmd_get_enhanced_capabilities(struct vmd_pci_device *dev, uint16_t capability_id)
430 {
431 	uint8_t *data;
432 	uint16_t cap_offset = EXTENDED_CAPABILITY_OFFSET;
433 	volatile struct pci_enhanced_capability_header *cap_hdr = NULL;
434 
435 	data = (uint8_t *)dev->header;
436 	while (cap_offset >= EXTENDED_CAPABILITY_OFFSET) {
437 		cap_hdr = (volatile struct pci_enhanced_capability_header *) &data[cap_offset];
438 		if (cap_hdr->capability_id == capability_id) {
439 			return cap_hdr;
440 		}
441 		cap_offset = cap_hdr->next;
442 		if (cap_offset == 0 || cap_offset < EXTENDED_CAPABILITY_OFFSET) {
443 			break;
444 		}
445 	}
446 
447 	return NULL;
448 }
449 
450 static void
451 vmd_read_config_space(struct vmd_pci_device *dev)
452 {
453 	/*
454 	 * Writes to the pci config space is posted weite. To ensure transaction reaches its destination
455 	 * before another write is posed, an immediate read of the written value should be performed.
456 	 */
457 	dev->header->common.command |= (BUS_MASTER_ENABLE | MEMORY_SPACE_ENABLE);
458 	{ uint16_t cmd = dev->header->common.command; (void)cmd; }
459 
460 	vmd_get_device_capabilities(dev);
461 	dev->sn_cap = (struct serial_number_capability *)vmd_get_enhanced_capabilities(dev,
462 			DEVICE_SERIAL_NUMBER_CAP_ID);
463 }
464 
465 static void
466 vmd_update_scan_info(struct vmd_pci_device *dev)
467 {
468 	struct vmd_adapter *vmd_adapter = dev->bus->vmd;
469 
470 	if (vmd_adapter->root_port_updated) {
471 		return;
472 	}
473 
474 	if (dev->header_type == PCI_HEADER_TYPE_NORMAL) {
475 		return;
476 	}
477 
478 	if (vmd_device_is_root_port(dev->header)) {
479 		vmd_adapter->root_port_updated = 1;
480 		SPDK_INFOLOG(vmd, "root_port_updated = %d\n",
481 			     vmd_adapter->root_port_updated);
482 		SPDK_INFOLOG(vmd, "upper:limit = %x : %x\n",
483 			     dev->header->one.prefetch_base_upper,
484 			     dev->header->one.prefetch_limit_upper);
485 		if (vmd_device_is_enumerated(dev->header)) {
486 			vmd_adapter->scan_completed = 1;
487 			SPDK_INFOLOG(vmd, "scan_completed = %d\n",
488 				     vmd_adapter->scan_completed);
489 		}
490 	}
491 }
492 
493 static void
494 vmd_reset_base_limit_registers(volatile struct pci_header *header)
495 {
496 	uint32_t reg __attribute__((unused));
497 
498 	/*
499 	 * Writes to the pci config space are posted writes.
500 	 * To ensure transaction reaches its destination
501 	 * before another write is posted, an immediate read
502 	 * of the written value should be performed.
503 	 */
504 	header->one.mem_base = 0xfff0;
505 	reg = header->one.mem_base;
506 	header->one.mem_limit = 0x0;
507 	reg = header->one.mem_limit;
508 	header->one.prefetch_base = 0x0;
509 	reg = header->one.prefetch_base;
510 	header->one.prefetch_limit = 0x0;
511 	reg = header->one.prefetch_limit;
512 	header->one.prefetch_base_upper = 0x0;
513 	reg = header->one.prefetch_base_upper;
514 	header->one.prefetch_limit_upper = 0x0;
515 	reg = header->one.prefetch_limit_upper;
516 	header->one.io_base_upper = 0x0;
517 	reg = header->one.io_base_upper;
518 	header->one.io_limit_upper = 0x0;
519 	reg = header->one.io_limit_upper;
520 	header->one.primary = 0;
521 	reg = header->one.primary;
522 	header->one.secondary = 0;
523 	reg = header->one.secondary;
524 	header->one.subordinate = 0;
525 	reg = header->one.subordinate;
526 }
527 
528 static void
529 vmd_init_hotplug(struct vmd_pci_device *dev, struct vmd_pci_bus *bus)
530 {
531 	struct vmd_adapter *vmd = bus->vmd;
532 	struct vmd_hot_plug *hp = &dev->hp;
533 	size_t mem_id;
534 
535 	dev->hotplug_capable = true;
536 	hp->bar.size = 1 << 20;
537 
538 	if (!vmd->scan_completed) {
539 		hp->bar.start = vmd_allocate_base_addr(vmd, NULL, hp->bar.size);
540 		bus->self->header->one.mem_base = BRIDGE_BASEREG(hp->bar.start);
541 		bus->self->header->one.mem_limit =
542 			bus->self->header->one.mem_base + BRIDGE_BASEREG(hp->bar.size - 1);
543 	} else {
544 		hp->bar.start = (uint64_t)bus->self->header->one.mem_base << 16;
545 	}
546 
547 	hp->bar.vaddr = (uint64_t)vmd->mem_vaddr + (hp->bar.start - vmd->membar);
548 
549 	TAILQ_INIT(&hp->free_mem_queue);
550 	TAILQ_INIT(&hp->unused_mem_queue);
551 	TAILQ_INIT(&hp->alloc_mem_queue);
552 
553 	hp->mem[0].size = hp->bar.size;
554 	hp->mem[0].addr = hp->bar.start;
555 
556 	TAILQ_INSERT_TAIL(&hp->free_mem_queue, &hp->mem[0], tailq);
557 
558 	for (mem_id = 1; mem_id < ADDR_ELEM_COUNT; ++mem_id) {
559 		TAILQ_INSERT_TAIL(&hp->unused_mem_queue, &hp->mem[mem_id], tailq);
560 	}
561 
562 	SPDK_INFOLOG(vmd, "%s: mem_base:mem_limit = %x : %x\n", __func__,
563 		     bus->self->header->one.mem_base, bus->self->header->one.mem_limit);
564 }
565 
566 static bool
567 vmd_bus_device_present(struct vmd_pci_bus *bus, uint32_t devfn)
568 {
569 	volatile struct pci_header *header;
570 
571 	header = (volatile struct pci_header *)(bus->vmd->cfg_vaddr +
572 						CONFIG_OFFSET_ADDR(bus->config_bus_number, devfn, 0, 0));
573 	if (!vmd_is_valid_cfg_addr(bus, (uint64_t)header)) {
574 		return false;
575 	}
576 
577 	if (header->common.vendor_id == PCI_INVALID_VENDORID || header->common.vendor_id == 0x0) {
578 		return false;
579 	}
580 
581 	return true;
582 }
583 
584 static struct vmd_pci_device *
585 vmd_alloc_dev(struct vmd_pci_bus *bus, uint32_t devfn)
586 {
587 	struct vmd_pci_device *dev = NULL;
588 	struct pci_header volatile *header;
589 	uint8_t header_type;
590 	uint32_t rev_class;
591 
592 	/* Make sure we're not creating two devices on the same dev/fn */
593 	TAILQ_FOREACH(dev, &bus->dev_list, tailq) {
594 		if (dev->devfn == devfn) {
595 			return NULL;
596 		}
597 	}
598 
599 	if (!vmd_bus_device_present(bus, devfn)) {
600 		return NULL;
601 	}
602 
603 	header = (struct pci_header * volatile)(bus->vmd->cfg_vaddr +
604 						CONFIG_OFFSET_ADDR(bus->config_bus_number, devfn, 0, 0));
605 
606 	SPDK_INFOLOG(vmd, "PCI device found: %04x:%04x ***\n",
607 		     header->common.vendor_id, header->common.device_id);
608 
609 	dev = calloc(1, sizeof(*dev));
610 	if (!dev) {
611 		return NULL;
612 	}
613 
614 	dev->header = header;
615 	dev->vid = dev->header->common.vendor_id;
616 	dev->did = dev->header->common.device_id;
617 	dev->bus = bus;
618 	dev->parent = bus;
619 	dev->devfn = devfn;
620 	header_type = dev->header->common.header_type;
621 	rev_class = dev->header->common.rev_class;
622 	dev->class = rev_class >> 8;
623 	dev->header_type = header_type & 0x7;
624 
625 	if (header_type == PCI_HEADER_TYPE_BRIDGE) {
626 		vmd_update_scan_info(dev);
627 		if (!dev->bus->vmd->scan_completed) {
628 			vmd_reset_base_limit_registers(dev->header);
629 		}
630 	}
631 
632 	vmd_read_config_space(dev);
633 
634 	return dev;
635 }
636 
637 static struct vmd_pci_bus *
638 vmd_create_new_bus(struct vmd_pci_bus *parent, struct vmd_pci_device *bridge, uint8_t bus_number)
639 {
640 	struct vmd_pci_bus *new_bus;
641 
642 	new_bus = calloc(1, sizeof(*new_bus));
643 	if (!new_bus) {
644 		return NULL;
645 	}
646 
647 	new_bus->parent = parent;
648 	new_bus->domain = parent->domain;
649 	new_bus->bus_number = bus_number;
650 	new_bus->secondary_bus = new_bus->subordinate_bus = bus_number;
651 	new_bus->self = bridge;
652 	new_bus->vmd = parent->vmd;
653 	new_bus->config_bus_number = new_bus->bus_number - new_bus->vmd->vmd_bus.bus_start;
654 	TAILQ_INIT(&new_bus->dev_list);
655 
656 	bridge->subordinate = new_bus;
657 
658 	bridge->pci.addr.bus = new_bus->bus_number;
659 	bridge->pci.addr.dev = bridge->devfn;
660 	bridge->pci.addr.func = 0;
661 	bridge->pci.addr.domain = parent->vmd->pci->addr.domain;
662 
663 	return new_bus;
664 }
665 
666 static uint8_t
667 vmd_get_next_bus_number(struct vmd_adapter *vmd)
668 {
669 	uint8_t bus = 0xff;
670 
671 	if ((vmd->next_bus_number + 1) < vmd->max_pci_bus) {
672 		bus = vmd->next_bus_number;
673 		vmd->next_bus_number++;
674 	}
675 
676 	return bus;
677 }
678 
679 static uint8_t
680 vmd_get_hotplug_bus_numbers(struct vmd_pci_device *dev)
681 {
682 	uint8_t bus_number = 0xff;
683 
684 	if (dev && dev->bus && dev->bus->vmd &&
685 	    ((dev->bus->vmd->next_bus_number + RESERVED_HOTPLUG_BUSES) < dev->bus->vmd->max_pci_bus)) {
686 		bus_number = RESERVED_HOTPLUG_BUSES;
687 		dev->bus->vmd->next_bus_number += RESERVED_HOTPLUG_BUSES;
688 	}
689 
690 	return bus_number;
691 }
692 
693 static void
694 vmd_enable_msix(struct vmd_pci_device *dev)
695 {
696 	volatile uint16_t control;
697 
698 	control = dev->msix_cap->message_control.as_uint16_t | (1 << 14);
699 	dev->msix_cap->message_control.as_uint16_t = control;
700 	control = dev->msix_cap->message_control.as_uint16_t;
701 	dev->msix_cap->message_control.as_uint16_t = (control | (1 << 15));
702 	control = dev->msix_cap->message_control.as_uint16_t;
703 	control = control & ~(1 << 14);
704 	dev->msix_cap->message_control.as_uint16_t = control;
705 	control = dev->msix_cap->message_control.as_uint16_t;
706 }
707 
708 static void
709 vmd_disable_msix(struct vmd_pci_device *dev)
710 {
711 	volatile uint16_t control;
712 
713 	control = dev->msix_cap->message_control.as_uint16_t | (1 << 14);
714 	dev->msix_cap->message_control.as_uint16_t = control;
715 	control = dev->msix_cap->message_control.as_uint16_t & ~(1 << 15);
716 	dev->msix_cap->message_control.as_uint16_t = control;
717 	control = dev->msix_cap->message_control.as_uint16_t;
718 }
719 
720 /*
721  * Set up MSI-X table entries for the port. Vmd MSIX vector 0 is used for
722  * port interrupt, so vector 0 is mapped to all MSIX entries for the port.
723  */
724 static void
725 vmd_setup_msix(struct vmd_pci_device *dev, volatile struct pci_msix_table_entry *vmdEntry)
726 {
727 	int entry;
728 
729 	if (!dev || !vmdEntry || !dev->msix_cap) {
730 		return;
731 	}
732 
733 	vmd_disable_msix(dev);
734 	if (dev->msix_table == NULL || dev->msix_table_size > MAX_MSIX_TABLE_SIZE) {
735 		return;
736 	}
737 
738 	for (entry = 0; entry < dev->msix_table_size; ++entry) {
739 		dev->msix_table[entry].vector_control = 1;
740 	}
741 	vmd_enable_msix(dev);
742 }
743 
744 static void
745 vmd_bus_update_bridge_info(struct vmd_pci_device *bridge)
746 {
747 	/* Update the subordinate bus of all bridges above this bridge */
748 	volatile struct vmd_pci_device *dev = bridge;
749 	uint8_t subordinate_bus;
750 
751 	if (!dev) {
752 		return;
753 	}
754 	subordinate_bus = bridge->header->one.subordinate;
755 	while (dev->parent_bridge != NULL) {
756 		dev = dev->parent_bridge;
757 		if (dev->header->one.subordinate < subordinate_bus) {
758 			dev->header->one.subordinate = subordinate_bus;
759 			subordinate_bus = dev->header->one.subordinate;
760 		}
761 	}
762 }
763 
764 static bool
765 vmd_is_supported_device(struct vmd_pci_device *dev)
766 {
767 	return dev->class == PCI_CLASS_STORAGE_EXPRESS;
768 }
769 
770 static int
771 vmd_dev_map_bar(struct spdk_pci_device *pci_dev, uint32_t bar,
772 		void **mapped_addr, uint64_t *phys_addr, uint64_t *size)
773 {
774 	struct vmd_pci_device *dev = SPDK_CONTAINEROF(pci_dev, struct vmd_pci_device, pci);
775 
776 	*size = dev->bar[bar].size;
777 	*phys_addr = dev->bar[bar].start;
778 	*mapped_addr = (void *)dev->bar[bar].vaddr;
779 
780 	return 0;
781 }
782 
783 static int
784 vmd_dev_unmap_bar(struct spdk_pci_device *_dev, uint32_t bar, void *addr)
785 {
786 	return 0;
787 }
788 
789 static int
790 vmd_dev_cfg_read(struct spdk_pci_device *_dev, void *value, uint32_t len,
791 		 uint32_t offset)
792 {
793 	struct vmd_pci_device *dev = SPDK_CONTAINEROF(_dev, struct vmd_pci_device, pci);
794 	volatile uint8_t *src = (volatile uint8_t *)dev->header;
795 	uint8_t *dst = value;
796 	size_t i;
797 
798 	if (len + offset > PCI_MAX_CFG_SIZE) {
799 		return -1;
800 	}
801 
802 	for (i = 0; i < len; ++i) {
803 		dst[i] = src[offset + i];
804 	}
805 
806 	return 0;
807 }
808 
809 static int
810 vmd_dev_cfg_write(struct spdk_pci_device *_dev,  void *value,
811 		  uint32_t len, uint32_t offset)
812 {
813 	struct vmd_pci_device *dev = SPDK_CONTAINEROF(_dev, struct vmd_pci_device, pci);
814 	volatile uint8_t *dst = (volatile uint8_t *)dev->header;
815 	uint8_t *src = value;
816 	size_t i;
817 
818 	if ((len + offset) > PCI_MAX_CFG_SIZE) {
819 		return -1;
820 	}
821 
822 	for (i = 0; i < len; ++i) {
823 		dst[offset + i] = src[i];
824 	}
825 
826 	return 0;
827 }
828 
829 static void
830 vmd_dev_free(struct vmd_pci_device *dev)
831 {
832 	struct vmd_pci_device *bus_device = dev->bus->self;
833 	size_t i, num_bars = dev->header_type ? 2 : 6;
834 
835 	/* Release the hotplug region if the device is under hotplug-capable bus */
836 	if (bus_device && bus_device->hotplug_capable) {
837 		for (i = 0; i < num_bars; ++i) {
838 			if (dev->bar[i].start != 0) {
839 				vmd_hotplug_free_addr(&bus_device->hp, dev->bar[i].start);
840 			}
841 		}
842 	}
843 
844 	free(dev);
845 }
846 
847 static void
848 vmd_dev_detach(struct spdk_pci_device *dev)
849 {
850 	struct vmd_pci_device *vmd_device = (struct vmd_pci_device *)dev;
851 	struct vmd_pci_bus *bus = vmd_device->bus;
852 
853 	spdk_pci_unhook_device(dev);
854 	TAILQ_REMOVE(&bus->dev_list, vmd_device, tailq);
855 
856 	vmd_dev_free(vmd_device);
857 }
858 
859 static void
860 vmd_dev_init(struct vmd_pci_device *dev)
861 {
862 	dev->pci.addr.domain = dev->bus->vmd->domain;
863 	dev->pci.addr.bus = dev->bus->bus_number;
864 	dev->pci.addr.dev = dev->devfn;
865 	dev->pci.addr.func = 0;
866 	dev->pci.socket_id = spdk_pci_device_get_socket_id(dev->bus->vmd->pci);
867 	dev->pci.id.vendor_id = dev->header->common.vendor_id;
868 	dev->pci.id.device_id = dev->header->common.device_id;
869 	dev->pci.type = "vmd";
870 	dev->pci.map_bar = vmd_dev_map_bar;
871 	dev->pci.unmap_bar = vmd_dev_unmap_bar;
872 	dev->pci.cfg_read = vmd_dev_cfg_read;
873 	dev->pci.cfg_write = vmd_dev_cfg_write;
874 	dev->hotplug_capable = false;
875 	if (dev->pcie_cap != NULL) {
876 		dev->cached_slot_control = dev->pcie_cap->slot_control;
877 	}
878 }
879 
880 static int
881 vmd_init_end_device(struct vmd_pci_device *dev)
882 {
883 	struct vmd_pci_bus *bus = dev->bus;
884 	struct vmd_adapter *vmd;
885 	struct spdk_pci_driver *driver;
886 	uint8_t bdf[32];
887 	int rc;
888 
889 	if (!vmd_assign_base_addrs(dev)) {
890 		SPDK_ERRLOG("Failed to allocate BARs for device: %p\n", dev);
891 		return -1;
892 	}
893 
894 	vmd_setup_msix(dev, &bus->vmd->msix_table[0]);
895 	vmd_dev_init(dev);
896 
897 	if (vmd_is_supported_device(dev)) {
898 		spdk_pci_addr_fmt(bdf, sizeof(bdf), &dev->pci.addr);
899 		SPDK_INFOLOG(vmd, "Initializing NVMe device at %s\n", bdf);
900 		dev->pci.parent = dev->bus->vmd->pci;
901 
902 		driver = spdk_pci_nvme_get_driver();
903 		assert(driver != NULL);
904 		rc = spdk_pci_hook_device(driver, &dev->pci);
905 		if (rc != 0) {
906 			SPDK_ERRLOG("Failed to hook device %s: %s\n", bdf, spdk_strerror(-rc));
907 			return -1;
908 		}
909 
910 		vmd = bus->vmd;
911 		vmd->target[vmd->nvme_count] = dev;
912 		vmd->nvme_count++;
913 	}
914 
915 	/* Attach the device to the current bus and assign base addresses */
916 	TAILQ_INSERT_TAIL(&bus->dev_list, dev, tailq);
917 	g_end_device_count++;
918 
919 	return 0;
920 }
921 
922 /*
923  * Scans a single bus for all devices attached and return a count of
924  * how many devices found. In the VMD topology, it is assume there are no multi-
925  * function devices. Hence a bus(bridge) will not have multi function with both type
926  * 0 and 1 header.
927  *
928  * The other option  for implementing this function is the bus is an int and
929  * create a new device PciBridge. PciBridge would inherit from PciDevice with extra fields,
930  * sub/pri/sec bus. The input becomes PciPort, bus number and parent_bridge.
931  *
932  * The bus number is scanned and if a device is found, based on the header_type, create
933  * either PciBridge(1) or PciDevice(0).
934  *
935  * If a PciBridge, assign bus numbers and rescan new bus. The currently PciBridge being
936  * scanned becomes the passed in parent_bridge with the new bus number.
937  *
938  * The linked list becomes list of pciBridges with PciDevices attached.
939  *
940  * Return count of how many devices found(type1 + type 0 header devices)
941  */
942 static uint8_t
943 vmd_scan_single_bus(struct vmd_pci_bus *bus, struct vmd_pci_device *parent_bridge, bool hotplug)
944 {
945 	/* assuming only single function devices are on the bus */
946 	struct vmd_pci_device *new_dev;
947 	union express_slot_capabilities_register slot_cap;
948 	struct vmd_pci_bus *new_bus;
949 	uint8_t  device_number, dev_cnt = 0;
950 	uint8_t new_bus_num;
951 	int rc;
952 
953 	for (device_number = 0; device_number < 32; device_number++) {
954 		new_dev = vmd_alloc_dev(bus, device_number);
955 		if (new_dev == NULL) {
956 			continue;
957 		}
958 
959 		if (new_dev->header->common.header_type & PCI_HEADER_TYPE_BRIDGE) {
960 			if (hotplug) {
961 				free(new_dev);
962 				continue;
963 			}
964 
965 			slot_cap.as_uint32_t = 0;
966 			if (new_dev->pcie_cap != NULL) {
967 				slot_cap.as_uint32_t = new_dev->pcie_cap->slot_cap.as_uint32_t;
968 			}
969 
970 			new_bus_num = vmd_get_next_bus_number(bus->vmd);
971 			if (new_bus_num == 0xff) {
972 				vmd_dev_free(new_dev);
973 				return dev_cnt;
974 			}
975 			new_bus = vmd_create_new_bus(bus, new_dev, new_bus_num);
976 			if (!new_bus) {
977 				vmd_dev_free(new_dev);
978 				return dev_cnt;
979 			}
980 			new_bus->primary_bus = bus->secondary_bus;
981 			new_bus->self = new_dev;
982 			new_dev->bus_object = new_bus;
983 
984 			if (slot_cap.bit_field.hotplug_capable && new_dev->pcie_cap != NULL &&
985 			    new_dev->pcie_cap->express_cap_register.bit_field.slot_implemented) {
986 				new_bus->hotplug_buses = vmd_get_hotplug_bus_numbers(new_dev);
987 				new_bus->subordinate_bus += new_bus->hotplug_buses;
988 
989 				/* Attach hot plug instance if HP is supported */
990 				/* Hot inserted SSDs can be assigned port bus of sub-ordinate + 1 */
991 				SPDK_INFOLOG(vmd, "hotplug_capable/slot_implemented = "
992 					     "%x:%x\n", slot_cap.bit_field.hotplug_capable,
993 					     new_dev->pcie_cap->express_cap_register.bit_field.slot_implemented);
994 			}
995 
996 			new_dev->parent_bridge = parent_bridge;
997 			new_dev->header->one.primary = new_bus->primary_bus;
998 			new_dev->header->one.secondary = new_bus->secondary_bus;
999 			new_dev->header->one.subordinate = new_bus->subordinate_bus;
1000 
1001 			vmd_bus_update_bridge_info(new_dev);
1002 			TAILQ_INSERT_TAIL(&bus->vmd->bus_list, new_bus, tailq);
1003 
1004 			vmd_dev_init(new_dev);
1005 			dev_cnt++;
1006 
1007 			if (slot_cap.bit_field.hotplug_capable && new_dev->pcie_cap != NULL &&
1008 			    new_dev->pcie_cap->express_cap_register.bit_field.slot_implemented) {
1009 				vmd_init_hotplug(new_dev, new_bus);
1010 			}
1011 
1012 			dev_cnt += vmd_scan_single_bus(new_bus, new_dev, hotplug);
1013 			if (new_dev->pcie_cap != NULL) {
1014 				if (new_dev->pcie_cap->express_cap_register.bit_field.device_type == SwitchUpstreamPort) {
1015 					return dev_cnt;
1016 				}
1017 			}
1018 		} else {
1019 			rc = vmd_init_end_device(new_dev);
1020 			if (rc != 0) {
1021 				vmd_dev_free(new_dev);
1022 			} else {
1023 				dev_cnt++;
1024 			}
1025 		}
1026 	}
1027 
1028 	return dev_cnt;
1029 }
1030 
1031 static void
1032 vmd_print_pci_info(struct vmd_pci_device *dev)
1033 {
1034 	if (!dev) {
1035 		return;
1036 	}
1037 
1038 	if (dev->pcie_cap != NULL) {
1039 		SPDK_INFOLOG(vmd, "PCI DEVICE: [%04X:%04X] type(%x) : %s\n",
1040 			     dev->header->common.vendor_id, dev->header->common.device_id,
1041 			     dev->pcie_cap->express_cap_register.bit_field.device_type,
1042 			     device_type[dev->pcie_cap->express_cap_register.bit_field.device_type]);
1043 	} else {
1044 		SPDK_INFOLOG(vmd, "PCI DEVICE: [%04X:%04X]\n",
1045 			     dev->header->common.vendor_id, dev->header->common.device_id);
1046 	}
1047 
1048 	SPDK_INFOLOG(vmd, "\tDOMAIN:BDF: %04x:%02x:%02x:%x\n", dev->pci.addr.domain,
1049 		     dev->pci.addr.bus, dev->pci.addr.dev, dev->pci.addr.func);
1050 
1051 	if (!(dev->header_type & PCI_HEADER_TYPE_BRIDGE) && dev->bus) {
1052 		SPDK_INFOLOG(vmd, "\tbase addr: %x : %p\n",
1053 			     dev->header->zero.BAR[0], (void *)dev->bar[0].vaddr);
1054 	}
1055 
1056 	if ((dev->header_type & PCI_HEADER_TYPE_BRIDGE)) {
1057 		SPDK_INFOLOG(vmd, "\tPrimary = %d, Secondary = %d, Subordinate = %d\n",
1058 			     dev->header->one.primary, dev->header->one.secondary, dev->header->one.subordinate);
1059 		if (dev->pcie_cap && dev->pcie_cap->express_cap_register.bit_field.slot_implemented) {
1060 			SPDK_INFOLOG(vmd, "\tSlot implemented on this device.\n");
1061 			if (dev->pcie_cap->slot_cap.bit_field.hotplug_capable) {
1062 				SPDK_INFOLOG(vmd, "Device has HOT-PLUG capable slot.\n");
1063 			}
1064 		}
1065 	}
1066 
1067 	if (dev->sn_cap != NULL) {
1068 		uint8_t *snLow = (uint8_t *)&dev->sn_cap->sn_low;
1069 		uint8_t *snHi = (uint8_t *)&dev->sn_cap->sn_hi;
1070 
1071 		SPDK_INFOLOG(vmd, "\tSN: %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x\n",
1072 			     snHi[3], snHi[2], snHi[1], snHi[0], snLow[3], snLow[2], snLow[1], snLow[0]);
1073 	}
1074 }
1075 
1076 static void
1077 vmd_cache_scan_info(struct vmd_pci_device *dev)
1078 {
1079 	uint32_t reg __attribute__((unused));
1080 
1081 	if (dev->header_type == PCI_HEADER_TYPE_NORMAL) {
1082 		return;
1083 	}
1084 
1085 	SPDK_INFOLOG(vmd, "vendor/device id:%x:%x\n", dev->header->common.vendor_id,
1086 		     dev->header->common.device_id);
1087 
1088 	if (vmd_device_is_root_port(dev->header)) {
1089 		dev->header->one.prefetch_base_upper = VMD_UPPER_BASE_SIGNATURE;
1090 		reg = dev->header->one.prefetch_base_upper;
1091 		dev->header->one.prefetch_limit_upper = VMD_UPPER_LIMIT_SIGNATURE;
1092 		reg = dev->header->one.prefetch_limit_upper;
1093 
1094 		SPDK_INFOLOG(vmd, "prefetch: %x:%x\n",
1095 			     dev->header->one.prefetch_base_upper,
1096 			     dev->header->one.prefetch_limit_upper);
1097 	}
1098 }
1099 
1100 static void
1101 vmd_reset_root_ports(struct vmd_pci_bus *bus)
1102 {
1103 	volatile struct pci_header *header;
1104 	uint32_t devfn;
1105 
1106 	/*
1107 	 * The root ports might have been configured by some other driver (e.g.  Linux kernel) prior
1108 	 * to loading the SPDK one, so we need to clear it.  We need to before the scanning process,
1109 	 * as it's depth-first, so when scanning the initial root ports, the latter ones might still
1110 	 * be using stale configuration.  This can lead to two bridges having the same
1111 	 * secondary/subordinate bus configuration, which, of course, isn't correct.
1112 	 * (Note: this fixed issue #2413.)
1113 	 */
1114 	for (devfn = 0; devfn < 32; ++devfn) {
1115 		if (!vmd_bus_device_present(bus, devfn)) {
1116 			continue;
1117 		}
1118 
1119 		header = (volatile void *)(bus->vmd->cfg_vaddr +
1120 					   CONFIG_OFFSET_ADDR(bus->config_bus_number, devfn, 0, 0));
1121 		if (vmd_device_is_root_port(header) && !vmd_device_is_enumerated(header)) {
1122 			vmd_reset_base_limit_registers(header);
1123 		}
1124 	}
1125 }
1126 
1127 static uint8_t
1128 vmd_scan_pcibus(struct vmd_pci_bus *bus)
1129 {
1130 	struct vmd_pci_bus *bus_entry;
1131 	struct vmd_pci_device *dev;
1132 	uint8_t dev_cnt;
1133 
1134 	vmd_reset_root_ports(bus);
1135 
1136 	g_end_device_count = 0;
1137 	TAILQ_INSERT_TAIL(&bus->vmd->bus_list, bus, tailq);
1138 	bus->vmd->next_bus_number = bus->bus_number + 1;
1139 	dev_cnt = vmd_scan_single_bus(bus, NULL, false);
1140 
1141 	SPDK_INFOLOG(vmd, "VMD scan found %u devices\n", dev_cnt);
1142 	SPDK_INFOLOG(vmd, "VMD scan found %u END DEVICES\n", g_end_device_count);
1143 
1144 	SPDK_INFOLOG(vmd, "PCIe devices attached to VMD %04x:%02x:%02x:%x...\n",
1145 		     bus->vmd->pci->addr.domain, bus->vmd->pci->addr.bus,
1146 		     bus->vmd->pci->addr.dev, bus->vmd->pci->addr.func);
1147 
1148 	TAILQ_FOREACH(bus_entry, &bus->vmd->bus_list, tailq) {
1149 		if (bus_entry->self != NULL) {
1150 			vmd_print_pci_info(bus_entry->self);
1151 			vmd_cache_scan_info(bus_entry->self);
1152 		}
1153 
1154 		TAILQ_FOREACH(dev, &bus_entry->dev_list, tailq) {
1155 			vmd_print_pci_info(dev);
1156 		}
1157 	}
1158 
1159 	return dev_cnt;
1160 }
1161 
1162 static int
1163 vmd_map_bars(struct vmd_adapter *vmd, struct spdk_pci_device *dev)
1164 {
1165 	int rc;
1166 
1167 	rc = spdk_pci_device_map_bar(dev, 0, (void **)&vmd->cfg_vaddr,
1168 				     &vmd->cfgbar, &vmd->cfgbar_size);
1169 	if (rc == 0) {
1170 		rc = spdk_pci_device_map_bar(dev, 2, (void **)&vmd->mem_vaddr,
1171 					     &vmd->membar, &vmd->membar_size);
1172 	}
1173 
1174 	if (rc == 0) {
1175 		rc = spdk_pci_device_map_bar(dev, 4, (void **)&vmd->msix_vaddr,
1176 					     &vmd->msixbar, &vmd->msixbar_size);
1177 	}
1178 
1179 	if (rc == 0) {
1180 		vmd->physical_addr = vmd->membar;
1181 		vmd->current_addr_size = vmd->membar_size;
1182 	}
1183 	return rc;
1184 }
1185 
1186 static void
1187 vmd_set_starting_bus_number(struct vmd_adapter *vmd, uint8_t *bus_start,
1188 			    uint8_t *max_bus)
1189 {
1190 	uint32_t vmd_cap = 0, vmd_config = 0;
1191 	uint8_t bus_restrict_cap, bus_restrictions;
1192 
1193 	spdk_pci_device_cfg_read32(vmd->pci, &vmd_cap, PCI_VMD_VMCAP);
1194 	spdk_pci_device_cfg_read32(vmd->pci, &vmd_config, PCI_VMD_VMCONFIG);
1195 
1196 	bus_restrict_cap = vmd_cap & 0x1; /* bit 0 */
1197 	bus_restrictions = (vmd_config >> 8) & 0x3; /* bits 8-9 */
1198 	if ((bus_restrict_cap == 0x1) && (bus_restrictions == 0x1)) {
1199 		*bus_start = 128;
1200 		*max_bus = 255;
1201 	} else {
1202 		*bus_start = 0;
1203 		*max_bus = 127;
1204 	}
1205 }
1206 
1207 static int
1208 vmd_enumerate_devices(struct vmd_adapter *vmd)
1209 {
1210 	uint8_t max_bus, bus_start;
1211 
1212 	vmd->vmd_bus.vmd = vmd;
1213 	vmd->vmd_bus.domain = vmd->pci->addr.domain;
1214 
1215 	if (vmd->pci->id.device_id == PCI_DEVICE_ID_INTEL_VMD_ICX) {
1216 		vmd_set_starting_bus_number(vmd, &bus_start, &max_bus);
1217 		vmd->vmd_bus.bus_start = bus_start;
1218 		vmd->vmd_bus.secondary_bus = vmd->vmd_bus.subordinate_bus = vmd->vmd_bus.bus_start;
1219 		vmd->vmd_bus.primary_bus = vmd->vmd_bus.bus_number = vmd->vmd_bus.bus_start;
1220 		vmd->max_pci_bus = max_bus;
1221 	} else {
1222 		vmd->vmd_bus.bus_start = 0;
1223 		vmd->vmd_bus.secondary_bus = vmd->vmd_bus.subordinate_bus = 0;
1224 		vmd->vmd_bus.primary_bus = vmd->vmd_bus.bus_number = 0;
1225 		vmd->max_pci_bus = PCI_MAX_BUS_NUMBER;
1226 	}
1227 
1228 	return vmd_scan_pcibus(&vmd->vmd_bus);
1229 }
1230 
1231 struct vmd_pci_device *
1232 vmd_find_device(const struct spdk_pci_addr *addr)
1233 {
1234 	struct vmd_pci_bus *bus;
1235 	struct vmd_pci_device *dev;
1236 	uint32_t i;
1237 
1238 	for (i = 0; i < g_vmd_container.count; ++i) {
1239 		TAILQ_FOREACH(bus, &g_vmd_container.vmd[i].bus_list, tailq) {
1240 			if (bus->self) {
1241 				if (spdk_pci_addr_compare(&bus->self->pci.addr, addr) == 0) {
1242 					return bus->self;
1243 				}
1244 			}
1245 
1246 			TAILQ_FOREACH(dev, &bus->dev_list, tailq) {
1247 				if (spdk_pci_addr_compare(&dev->pci.addr, addr) == 0) {
1248 					return dev;
1249 				}
1250 			}
1251 		}
1252 	}
1253 
1254 	return NULL;
1255 }
1256 
1257 static int
1258 vmd_enum_cb(void *ctx, struct spdk_pci_device *pci_dev)
1259 {
1260 	uint32_t cmd_reg = 0;
1261 	char bdf[32] = {0};
1262 	struct vmd_container *vmd_c = ctx;
1263 	size_t i;
1264 
1265 	spdk_pci_device_cfg_read32(pci_dev, &cmd_reg, 4);
1266 	cmd_reg |= 0x6;                      /* PCI bus master/memory enable. */
1267 	spdk_pci_device_cfg_write32(pci_dev, cmd_reg, 4);
1268 
1269 	spdk_pci_addr_fmt(bdf, sizeof(bdf), &pci_dev->addr);
1270 	SPDK_INFOLOG(vmd, "Found a VMD[ %d ] at %s\n", vmd_c->count, bdf);
1271 
1272 	/* map vmd bars */
1273 	i = vmd_c->count;
1274 	vmd_c->vmd[i].pci = pci_dev;
1275 	vmd_c->vmd[i].vmd_index = i;
1276 	vmd_c->vmd[i].domain =
1277 		(pci_dev->addr.bus << 16) | (pci_dev->addr.dev << 8) | pci_dev->addr.func;
1278 	TAILQ_INIT(&vmd_c->vmd[i].bus_list);
1279 
1280 	if (vmd_map_bars(&vmd_c->vmd[i], pci_dev) == -1) {
1281 		return -1;
1282 	}
1283 
1284 	SPDK_INFOLOG(vmd, "vmd config bar(%p) vaddr(%p) size(%x)\n",
1285 		     (void *)vmd_c->vmd[i].cfgbar, (void *)vmd_c->vmd[i].cfg_vaddr,
1286 		     (uint32_t)vmd_c->vmd[i].cfgbar_size);
1287 	SPDK_INFOLOG(vmd, "vmd mem bar(%p) vaddr(%p) size(%x)\n",
1288 		     (void *)vmd_c->vmd[i].membar, (void *)vmd_c->vmd[i].mem_vaddr,
1289 		     (uint32_t)vmd_c->vmd[i].membar_size);
1290 	SPDK_INFOLOG(vmd, "vmd msix bar(%p) vaddr(%p) size(%x)\n\n",
1291 		     (void *)vmd_c->vmd[i].msixbar, (void *)vmd_c->vmd[i].msix_vaddr,
1292 		     (uint32_t)vmd_c->vmd[i].msixbar_size);
1293 
1294 	vmd_c->count = i + 1;
1295 
1296 	vmd_enumerate_devices(&vmd_c->vmd[i]);
1297 
1298 	return 0;
1299 }
1300 
1301 int
1302 spdk_vmd_pci_device_list(struct spdk_pci_addr vmd_addr, struct spdk_pci_device *nvme_list)
1303 {
1304 	int cnt = 0;
1305 	struct vmd_pci_bus *bus;
1306 	struct vmd_pci_device *dev;
1307 	uint32_t i;
1308 
1309 	if (!nvme_list) {
1310 		return -1;
1311 	}
1312 
1313 	for (i = 0; i < g_vmd_container.count; ++i) {
1314 		if (spdk_pci_addr_compare(&vmd_addr, &g_vmd_container.vmd[i].pci->addr) == 0) {
1315 			TAILQ_FOREACH(bus, &g_vmd_container.vmd[i].bus_list, tailq) {
1316 				TAILQ_FOREACH(dev, &bus->dev_list, tailq) {
1317 					nvme_list[cnt++] = dev->pci;
1318 					if (!dev->is_hooked) {
1319 						vmd_dev_init(dev);
1320 						dev->is_hooked = 1;
1321 					}
1322 				}
1323 			}
1324 		}
1325 	}
1326 
1327 	return cnt;
1328 }
1329 
1330 static void
1331 vmd_clear_hotplug_status(struct vmd_pci_bus *bus)
1332 {
1333 	struct vmd_pci_device *device = bus->self;
1334 	uint16_t status __attribute__((unused));
1335 
1336 	status = device->pcie_cap->slot_status.as_uint16_t;
1337 	device->pcie_cap->slot_status.as_uint16_t = status;
1338 	status = device->pcie_cap->slot_status.as_uint16_t;
1339 
1340 	status = device->pcie_cap->link_status.as_uint16_t;
1341 	device->pcie_cap->link_status.as_uint16_t = status;
1342 	status = device->pcie_cap->link_status.as_uint16_t;
1343 }
1344 
1345 static void
1346 vmd_bus_handle_hotplug(struct vmd_pci_bus *bus)
1347 {
1348 	uint8_t num_devices, sleep_count;
1349 
1350 	for (sleep_count = 0; sleep_count < 20; ++sleep_count) {
1351 		/* Scan until a new device is found */
1352 		num_devices = vmd_scan_single_bus(bus, bus->self, true);
1353 		if (num_devices > 0) {
1354 			break;
1355 		}
1356 
1357 		spdk_delay_us(200000);
1358 	}
1359 
1360 	if (num_devices == 0) {
1361 		SPDK_ERRLOG("Timed out while scanning for hotplugged devices\n");
1362 	}
1363 }
1364 
1365 static void
1366 vmd_remove_device(struct vmd_pci_device *device)
1367 {
1368 	device->pci.internal.pending_removal = true;
1369 
1370 	/* If the device isn't attached, remove it immediately */
1371 	if (!device->pci.internal.attached) {
1372 		vmd_dev_detach(&device->pci);
1373 	}
1374 }
1375 
1376 static void
1377 vmd_bus_handle_hotremove(struct vmd_pci_bus *bus)
1378 {
1379 	struct vmd_pci_device *device, *tmpdev;
1380 
1381 	TAILQ_FOREACH_SAFE(device, &bus->dev_list, tailq, tmpdev) {
1382 		if (!vmd_bus_device_present(bus, device->devfn)) {
1383 			vmd_remove_device(device);
1384 		}
1385 	}
1386 }
1387 
1388 int
1389 spdk_vmd_hotplug_monitor(void)
1390 {
1391 	struct vmd_pci_bus *bus;
1392 	struct vmd_pci_device *device;
1393 	int num_hotplugs = 0;
1394 	uint32_t i;
1395 
1396 	for (i = 0; i < g_vmd_container.count; ++i) {
1397 		TAILQ_FOREACH(bus, &g_vmd_container.vmd[i].bus_list, tailq) {
1398 			device = bus->self;
1399 			if (device == NULL || !device->hotplug_capable) {
1400 				continue;
1401 			}
1402 
1403 			if (device->pcie_cap->slot_status.bit_field.datalink_state_changed != 1) {
1404 				continue;
1405 			}
1406 
1407 			if (device->pcie_cap->link_status.bit_field.datalink_layer_active == 1) {
1408 				SPDK_INFOLOG(vmd, "Device hotplug detected on bus "
1409 					     "%"PRIu32"\n", bus->bus_number);
1410 				vmd_bus_handle_hotplug(bus);
1411 			} else {
1412 				SPDK_INFOLOG(vmd, "Device hotremove detected on bus "
1413 					     "%"PRIu32"\n", bus->bus_number);
1414 				vmd_bus_handle_hotremove(bus);
1415 			}
1416 
1417 			vmd_clear_hotplug_status(bus);
1418 			num_hotplugs++;
1419 		}
1420 	}
1421 
1422 	return num_hotplugs;
1423 }
1424 
1425 int
1426 spdk_vmd_remove_device(const struct spdk_pci_addr *addr)
1427 {
1428 	struct vmd_pci_device *device;
1429 
1430 	device = vmd_find_device(addr);
1431 	if (device == NULL) {
1432 		return -ENODEV;
1433 	}
1434 
1435 	assert(strcmp(spdk_pci_device_get_type(&device->pci), "vmd") == 0);
1436 	vmd_remove_device(device);
1437 
1438 	return 0;
1439 }
1440 
1441 int
1442 spdk_vmd_rescan(void)
1443 {
1444 	struct vmd_pci_bus *bus;
1445 	uint32_t i;
1446 	int rc = 0;
1447 
1448 	for (i = 0; i < g_vmd_container.count; ++i) {
1449 		TAILQ_FOREACH(bus, &g_vmd_container.vmd[i].bus_list, tailq) {
1450 			rc += vmd_scan_single_bus(bus, bus->self, true);
1451 		}
1452 	}
1453 
1454 	return rc;
1455 }
1456 
1457 static int
1458 vmd_attach_device(const struct spdk_pci_addr *addr)
1459 {
1460 	struct vmd_pci_bus *bus;
1461 	struct vmd_adapter *vmd;
1462 	struct vmd_pci_device *dev;
1463 	uint32_t i;
1464 	int rc;
1465 
1466 	/* VMD always sets function to zero */
1467 	if (addr->func != 0) {
1468 		return -ENODEV;
1469 	}
1470 
1471 	for (i = 0; i < g_vmd_container.count; ++i) {
1472 		vmd = &g_vmd_container.vmd[i];
1473 		if (vmd->domain != addr->domain) {
1474 			continue;
1475 		}
1476 
1477 		TAILQ_FOREACH(bus, &vmd->bus_list, tailq) {
1478 			if (bus->bus_number != addr->bus) {
1479 				continue;
1480 			}
1481 
1482 			dev = vmd_alloc_dev(bus, addr->dev);
1483 			if (dev == NULL) {
1484 				return -ENODEV;
1485 			}
1486 
1487 			/* Only allow attaching endpoint devices */
1488 			if (dev->header->common.header_type & PCI_HEADER_TYPE_BRIDGE) {
1489 				free(dev);
1490 				return -ENODEV;
1491 			}
1492 
1493 			rc = vmd_init_end_device(dev);
1494 			if (rc != 0) {
1495 				free(dev);
1496 				return -ENODEV;
1497 			}
1498 
1499 			return 0;
1500 		}
1501 	}
1502 
1503 	return -ENODEV;
1504 }
1505 
1506 static void
1507 vmd_detach_device(struct spdk_pci_device *pci_dev)
1508 {
1509 	struct vmd_pci_device *dev = SPDK_CONTAINEROF(pci_dev, struct vmd_pci_device, pci);
1510 
1511 	assert(strcmp(spdk_pci_device_get_type(pci_dev), "vmd") == 0);
1512 	assert(vmd_find_device(&pci_dev->addr) != NULL);
1513 
1514 	vmd_remove_device(dev);
1515 }
1516 
1517 static struct spdk_pci_device_provider g_vmd_device_provider = {
1518 	.name = "vmd",
1519 	.attach_cb = vmd_attach_device,
1520 	.detach_cb = vmd_detach_device,
1521 };
1522 
1523 SPDK_PCI_REGISTER_DEVICE_PROVIDER(vmd, &g_vmd_device_provider);
1524 
1525 int
1526 spdk_vmd_init(void)
1527 {
1528 	return spdk_pci_enumerate(spdk_pci_vmd_get_driver(), vmd_enum_cb, &g_vmd_container);
1529 }
1530 
1531 void
1532 spdk_vmd_fini(void)
1533 {
1534 	uint32_t i;
1535 
1536 	for (i = 0; i < g_vmd_container.count; ++i) {
1537 		spdk_pci_device_detach(g_vmd_container.vmd[i].pci);
1538 	}
1539 }
1540 
1541 SPDK_LOG_REGISTER_COMPONENT(vmd)
1542