xref: /spdk/lib/vmd/vmd.c (revision 5fd9561f54daa8eff7f3bcb56c789655bca846b1)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "vmd.h"
35 
36 #include "spdk/stdinc.h"
37 #include "spdk/likely.h"
38 
39 static unsigned char *device_type[] = {
40 	"PCI Express Endpoint",
41 	"Legacy PCI Express Endpoint",
42 	"Reserved 1",
43 	"Reserved 2",
44 	"Root Port of PCI Express Root Complex",
45 	"Upstream Port of PCI Express Switch",
46 	"Downstream Port of PCI Express Switch",
47 	"PCI Express to PCI/PCI-X Bridge",
48 	"PCI/PCI-X to PCI Express Bridge",
49 	"Root Complex Integrated Endpoint",
50 	"Root Complex Event Collector",
51 	"Reserved Capability"
52 };
53 
54 /*
55  * Container for all VMD adapter probed in the system.
56  */
57 struct vmd_container {
58 	uint32_t count;
59 	struct vmd_adapter vmd[MAX_VMD_SUPPORTED];
60 };
61 
62 static struct vmd_container g_vmd_container;
63 static uint8_t g_end_device_count;
64 
65 static bool
66 vmd_is_valid_cfg_addr(struct vmd_pci_bus *bus, uint64_t addr)
67 {
68 	return addr >= (uint64_t)bus->vmd->cfg_vaddr &&
69 	       addr < bus->vmd->cfgbar_size + (uint64_t)bus->vmd->cfg_vaddr;
70 }
71 
72 static void
73 vmd_align_base_addrs(struct vmd_adapter *vmd, uint32_t alignment)
74 {
75 	uint32_t pad;
76 
77 	/*
78 	 *  Device is not in hot plug path, align the base address remaining from membar 1.
79 	 */
80 	if (vmd->physical_addr & (alignment - 1)) {
81 		pad = alignment - (vmd->physical_addr & (alignment - 1));
82 		vmd->physical_addr += pad;
83 		vmd->current_addr_size -= pad;
84 	}
85 }
86 
87 static bool
88 vmd_device_is_enumerated(volatile struct pci_header *header)
89 {
90 	return header->one.prefetch_base_upper == VMD_UPPER_BASE_SIGNATURE &&
91 	       header->one.prefetch_limit_upper == VMD_UPPER_LIMIT_SIGNATURE;
92 }
93 
94 static bool
95 vmd_device_is_root_port(volatile struct pci_header *header)
96 {
97 	return header->common.vendor_id == SPDK_PCI_VID_INTEL &&
98 	       (header->common.device_id == PCI_ROOT_PORT_A_INTEL_SKX ||
99 		header->common.device_id == PCI_ROOT_PORT_B_INTEL_SKX ||
100 		header->common.device_id == PCI_ROOT_PORT_C_INTEL_SKX ||
101 		header->common.device_id == PCI_ROOT_PORT_D_INTEL_SKX ||
102 		header->common.device_id == PCI_ROOT_PORT_A_INTEL_ICX ||
103 		header->common.device_id == PCI_ROOT_PORT_B_INTEL_ICX ||
104 		header->common.device_id == PCI_ROOT_PORT_C_INTEL_ICX ||
105 		header->common.device_id == PCI_ROOT_PORT_D_INTEL_ICX);
106 }
107 
108 static void
109 vmd_hotplug_coalesce_regions(struct vmd_hot_plug *hp)
110 {
111 	struct pci_mem_mgr *region, *prev;
112 
113 	do {
114 		prev = NULL;
115 		TAILQ_FOREACH(region, &hp->free_mem_queue, tailq) {
116 			if (prev != NULL && (prev->addr + prev->size == region->addr)) {
117 				break;
118 			}
119 
120 			prev = region;
121 		}
122 
123 		if (region != NULL) {
124 			prev->size += region->size;
125 			TAILQ_REMOVE(&hp->free_mem_queue, region, tailq);
126 			TAILQ_INSERT_TAIL(&hp->unused_mem_queue, region, tailq);
127 		}
128 	} while (region != NULL);
129 }
130 
131 static void
132 vmd_hotplug_free_region(struct vmd_hot_plug *hp, struct pci_mem_mgr *region)
133 {
134 	struct pci_mem_mgr *current, *prev = NULL;
135 
136 	assert(region->addr >= hp->bar.start && region->addr < hp->bar.start + hp->bar.size);
137 
138 	TAILQ_FOREACH(current, &hp->free_mem_queue, tailq) {
139 		if (current->addr > region->addr) {
140 			break;
141 		}
142 
143 		prev = current;
144 	}
145 
146 	if (prev != NULL) {
147 		assert(prev->addr + prev->size <= region->addr);
148 		assert(current == NULL || (region->addr + region->size <= current->addr));
149 		TAILQ_INSERT_AFTER(&hp->free_mem_queue, prev, region, tailq);
150 	} else {
151 		TAILQ_INSERT_HEAD(&hp->free_mem_queue, region, tailq);
152 	}
153 
154 	vmd_hotplug_coalesce_regions(hp);
155 }
156 
157 static void
158 vmd_hotplug_free_addr(struct vmd_hot_plug *hp, uint64_t addr)
159 {
160 	struct pci_mem_mgr *region;
161 
162 	TAILQ_FOREACH(region, &hp->alloc_mem_queue, tailq) {
163 		if (region->addr == addr) {
164 			break;
165 		}
166 	}
167 
168 	assert(region != NULL);
169 	TAILQ_REMOVE(&hp->alloc_mem_queue, region, tailq);
170 
171 	vmd_hotplug_free_region(hp, region);
172 }
173 
174 static uint64_t
175 vmd_hotplug_allocate_base_addr(struct vmd_hot_plug *hp, uint32_t size)
176 {
177 	struct pci_mem_mgr *region = NULL, *free_region;
178 
179 	TAILQ_FOREACH(region, &hp->free_mem_queue, tailq) {
180 		if (region->size >= size) {
181 			break;
182 		}
183 	}
184 
185 	if (region == NULL) {
186 		SPDK_INFOLOG(vmd, "Unable to find free hotplug memory region of size:"
187 			     "%"PRIx32"\n", size);
188 		return 0;
189 	}
190 
191 	TAILQ_REMOVE(&hp->free_mem_queue, region, tailq);
192 	if (size < region->size) {
193 		free_region = TAILQ_FIRST(&hp->unused_mem_queue);
194 		if (free_region == NULL) {
195 			SPDK_INFOLOG(vmd, "Unable to find unused descriptor to store the "
196 				     "free region of size: %"PRIu32"\n", region->size - size);
197 		} else {
198 			TAILQ_REMOVE(&hp->unused_mem_queue, free_region, tailq);
199 			free_region->size = region->size - size;
200 			free_region->addr = region->addr + size;
201 			region->size = size;
202 			vmd_hotplug_free_region(hp, free_region);
203 		}
204 	}
205 
206 	TAILQ_INSERT_TAIL(&hp->alloc_mem_queue, region, tailq);
207 
208 	return region->addr;
209 }
210 
211 /*
212  *  Allocates an address from vmd membar for the input memory size
213  *  vmdAdapter - vmd adapter object
214  *  dev - vmd_pci_device to allocate a base address for.
215  *  size - size of the memory window requested.
216  *  Size must be an integral multiple of 2. Addresses are returned on the size boundary.
217  *  Returns physical address within the VMD membar window, or 0x0 if cannot allocate window.
218  *  Consider increasing the size of vmd membar if 0x0 is returned.
219  */
220 static uint64_t
221 vmd_allocate_base_addr(struct vmd_adapter *vmd, struct vmd_pci_device *dev, uint32_t size)
222 {
223 	uint64_t base_address = 0, padding = 0;
224 	struct vmd_pci_bus *hp_bus;
225 
226 	if (size && ((size & (~size + 1)) != size)) {
227 		return base_address;
228 	}
229 
230 	/*
231 	 *  If device is downstream of a hot plug port, allocate address from the
232 	 *  range dedicated for the hot plug slot. Search the list of addresses allocated to determine
233 	 *  if a free range exists that satisfy the input request.  If a free range cannot be found,
234 	 *  get a buffer from the  unused chunk. First fit algorithm, is used.
235 	 */
236 	if (dev) {
237 		hp_bus = dev->parent;
238 		if (hp_bus && hp_bus->self && hp_bus->self->hotplug_capable) {
239 			return vmd_hotplug_allocate_base_addr(&hp_bus->self->hp, size);
240 		}
241 	}
242 
243 	/* Ensure physical membar allocated is size aligned */
244 	if (vmd->physical_addr & (size - 1)) {
245 		padding = size - (vmd->physical_addr & (size - 1));
246 	}
247 
248 	/* Allocate from membar if enough memory is left */
249 	if (vmd->current_addr_size >= size + padding) {
250 		base_address = vmd->physical_addr + padding;
251 		vmd->physical_addr += size + padding;
252 		vmd->current_addr_size -= size + padding;
253 	}
254 
255 	SPDK_INFOLOG(vmd, "allocated(size) %" PRIx64 " (%x)\n", base_address, size);
256 
257 	return base_address;
258 }
259 
260 static bool
261 vmd_is_end_device(struct vmd_pci_device *dev)
262 {
263 	return (dev && dev->header) &&
264 	       ((dev->header->common.header_type & ~PCI_MULTI_FUNCTION) == PCI_HEADER_TYPE_NORMAL);
265 }
266 
267 static void
268 vmd_update_base_limit_register(struct vmd_pci_device *dev, uint16_t base, uint16_t limit)
269 {
270 	struct vmd_pci_bus *bus;
271 	struct vmd_pci_device *bridge;
272 
273 	if (base == 0 ||  limit == 0) {
274 		return;
275 	}
276 
277 	if (dev->header->common.header_type == PCI_HEADER_TYPE_BRIDGE) {
278 		bus = dev->bus_object;
279 	} else {
280 		bus = dev->parent;
281 	}
282 
283 	bridge = bus->self;
284 	SPDK_INFOLOG(vmd, "base:limit = %x:%x\n", bridge->header->one.mem_base,
285 		     bridge->header->one.mem_limit);
286 
287 	if (dev->bus->vmd->scan_completed) {
288 		return;
289 	}
290 
291 	while (bus && bus->self != NULL) {
292 		bridge = bus->self;
293 
294 		/* This is only for 32-bit memory space, need to revisit to support 64-bit */
295 		if (bridge->header->one.mem_base > base) {
296 			bridge->header->one.mem_base = base;
297 			base = bridge->header->one.mem_base;
298 		}
299 
300 		if (bridge->header->one.mem_limit < limit) {
301 			bridge->header->one.mem_limit = limit;
302 			limit = bridge->header->one.mem_limit;
303 		}
304 
305 		bus = bus->parent;
306 	}
307 }
308 
309 static uint64_t
310 vmd_get_base_addr(struct vmd_pci_device *dev, uint32_t index, uint32_t size)
311 {
312 	struct vmd_pci_bus *bus = dev->parent;
313 
314 	if (dev->header_type == PCI_HEADER_TYPE_BRIDGE) {
315 		return dev->header->zero.BAR[index] & ~0xf;
316 	} else {
317 		if (bus->self->hotplug_capable) {
318 			return vmd_hotplug_allocate_base_addr(&bus->self->hp, size);
319 		} else {
320 			return (uint64_t)bus->self->header->one.mem_base << 16;
321 		}
322 	}
323 }
324 
325 static bool
326 vmd_assign_base_addrs(struct vmd_pci_device *dev)
327 {
328 	uint16_t mem_base = 0, mem_limit = 0;
329 	unsigned char mem_attr = 0;
330 	int last;
331 	struct vmd_adapter *vmd = NULL;
332 	bool ret_val = false;
333 	uint32_t bar_value;
334 	uint32_t table_offset;
335 
336 	if (dev && dev->bus) {
337 		vmd = dev->bus->vmd;
338 	}
339 
340 	if (!vmd) {
341 		return 0;
342 	}
343 
344 	vmd_align_base_addrs(vmd, ONE_MB);
345 
346 	last = dev->header_type ? 2 : 6;
347 	for (int i = 0; i < last; i++) {
348 		bar_value = dev->header->zero.BAR[i];
349 		dev->header->zero.BAR[i] = ~(0U);
350 		dev->bar[i].size = dev->header->zero.BAR[i];
351 		dev->header->zero.BAR[i] = bar_value;
352 
353 		if (dev->bar[i].size == ~(0U) || dev->bar[i].size == 0  ||
354 		    dev->header->zero.BAR[i] & 1) {
355 			dev->bar[i].size = 0;
356 			continue;
357 		}
358 		mem_attr = dev->bar[i].size & PCI_BASE_ADDR_MASK;
359 		dev->bar[i].size = TWOS_COMPLEMENT(dev->bar[i].size & PCI_BASE_ADDR_MASK);
360 
361 		if (vmd->scan_completed) {
362 			dev->bar[i].start = vmd_get_base_addr(dev, i, dev->bar[i].size);
363 		} else {
364 			dev->bar[i].start = vmd_allocate_base_addr(vmd, dev, dev->bar[i].size);
365 		}
366 
367 		dev->header->zero.BAR[i] = (uint32_t)dev->bar[i].start;
368 
369 		if (!dev->bar[i].start) {
370 			if (mem_attr == (PCI_BAR_MEMORY_PREFETCH | PCI_BAR_MEMORY_TYPE_64)) {
371 				i++;
372 			}
373 			continue;
374 		}
375 
376 		dev->bar[i].vaddr = ((uint64_t)vmd->mem_vaddr + (dev->bar[i].start - vmd->membar));
377 		mem_limit = BRIDGE_BASEREG(dev->header->zero.BAR[i]) +
378 			    BRIDGE_BASEREG(dev->bar[i].size - 1);
379 		if (!mem_base) {
380 			mem_base = BRIDGE_BASEREG(dev->header->zero.BAR[i]);
381 		}
382 
383 		ret_val = true;
384 
385 		if (mem_attr == (PCI_BAR_MEMORY_PREFETCH | PCI_BAR_MEMORY_TYPE_64)) {
386 			i++;
387 			if (i < last) {
388 				dev->header->zero.BAR[i] = (uint32_t)(dev->bar[i].start >> PCI_DWORD_SHIFT);
389 			}
390 		}
391 	}
392 
393 	/* Enable device MEM and bus mastering */
394 	dev->header->zero.command |= (PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
395 	uint16_t cmd = dev->header->zero.command;
396 	cmd++;
397 
398 	if (dev->msix_cap && ret_val) {
399 		table_offset = ((volatile struct pci_msix_cap *)dev->msix_cap)->msix_table_offset;
400 		if (dev->bar[table_offset & 0x3].vaddr) {
401 			dev->msix_table = (volatile struct pci_msix_table_entry *)
402 					  (dev->bar[table_offset & 0x3].vaddr + (table_offset & 0xfff8));
403 		}
404 	}
405 
406 	if (ret_val && vmd_is_end_device(dev)) {
407 		vmd_update_base_limit_register(dev, mem_base, mem_limit);
408 	}
409 
410 	return ret_val;
411 }
412 
413 static void
414 vmd_get_device_capabilities(struct vmd_pci_device *dev)
415 
416 {
417 	volatile uint8_t *config_space;
418 	uint8_t capabilities_offset;
419 	struct pci_capabilities_header *capabilities_hdr;
420 
421 	config_space = (volatile uint8_t *)dev->header;
422 	if ((dev->header->common.status  & PCI_CAPABILITIES_LIST) == 0) {
423 		return;
424 	}
425 
426 	capabilities_offset = dev->header->zero.cap_pointer;
427 	if (dev->header->common.header_type & PCI_HEADER_TYPE_BRIDGE) {
428 		capabilities_offset = dev->header->one.cap_pointer;
429 	}
430 
431 	while (capabilities_offset > 0) {
432 		capabilities_hdr = (struct pci_capabilities_header *)
433 				   &config_space[capabilities_offset];
434 		switch (capabilities_hdr->capability_id) {
435 		case CAPABILITY_ID_PCI_EXPRESS:
436 			dev->pcie_cap = (volatile struct pci_express_cap *)(capabilities_hdr);
437 			break;
438 
439 		case CAPABILITY_ID_MSI:
440 			dev->msi_cap = (volatile struct pci_msi_cap *)capabilities_hdr;
441 			break;
442 
443 		case CAPABILITY_ID_MSIX:
444 			dev->msix_cap = (volatile struct pci_msix_capability *)capabilities_hdr;
445 			dev->msix_table_size = dev->msix_cap->message_control.bit.table_size + 1;
446 			break;
447 
448 		default:
449 			break;
450 		}
451 		capabilities_offset = capabilities_hdr->next;
452 	}
453 }
454 
455 static volatile struct pci_enhanced_capability_header *
456 vmd_get_enhanced_capabilities(struct vmd_pci_device *dev, uint16_t capability_id)
457 {
458 	uint8_t *data;
459 	uint16_t cap_offset = EXTENDED_CAPABILITY_OFFSET;
460 	volatile struct pci_enhanced_capability_header *cap_hdr = NULL;
461 
462 	data = (uint8_t *)dev->header;
463 	while (cap_offset >= EXTENDED_CAPABILITY_OFFSET) {
464 		cap_hdr = (volatile struct pci_enhanced_capability_header *) &data[cap_offset];
465 		if (cap_hdr->capability_id == capability_id) {
466 			return cap_hdr;
467 		}
468 		cap_offset = cap_hdr->next;
469 		if (cap_offset == 0 || cap_offset < EXTENDED_CAPABILITY_OFFSET) {
470 			break;
471 		}
472 	}
473 
474 	return NULL;
475 }
476 
477 static void
478 vmd_read_config_space(struct vmd_pci_device *dev)
479 {
480 	/*
481 	 * Writes to the pci config space is posted weite. To ensure transaction reaches its destination
482 	 * before another write is posed, an immediate read of the written value should be performed.
483 	 */
484 	dev->header->common.command |= (BUS_MASTER_ENABLE | MEMORY_SPACE_ENABLE);
485 	{ uint16_t cmd = dev->header->common.command; (void)cmd; }
486 
487 	vmd_get_device_capabilities(dev);
488 	dev->sn_cap = (struct serial_number_capability *)vmd_get_enhanced_capabilities(dev,
489 			DEVICE_SERIAL_NUMBER_CAP_ID);
490 }
491 
492 static void
493 vmd_update_scan_info(struct vmd_pci_device *dev)
494 {
495 	struct vmd_adapter *vmd_adapter = dev->bus->vmd;
496 
497 	if (vmd_adapter->root_port_updated) {
498 		return;
499 	}
500 
501 	if (dev->header_type == PCI_HEADER_TYPE_NORMAL) {
502 		return;
503 	}
504 
505 	if (vmd_device_is_root_port(dev->header)) {
506 		vmd_adapter->root_port_updated = 1;
507 		SPDK_INFOLOG(vmd, "root_port_updated = %d\n",
508 			     vmd_adapter->root_port_updated);
509 		SPDK_INFOLOG(vmd, "upper:limit = %x : %x\n",
510 			     dev->header->one.prefetch_base_upper,
511 			     dev->header->one.prefetch_limit_upper);
512 		if (vmd_device_is_enumerated(dev->header)) {
513 			vmd_adapter->scan_completed = 1;
514 			SPDK_INFOLOG(vmd, "scan_completed = %d\n",
515 				     vmd_adapter->scan_completed);
516 		}
517 	}
518 }
519 
520 static void
521 vmd_reset_base_limit_registers(volatile struct pci_header *header)
522 {
523 	uint32_t reg __attribute__((unused));
524 
525 	/*
526 	 * Writes to the pci config space are posted writes.
527 	 * To ensure transaction reaches its destination
528 	 * before another write is posted, an immediate read
529 	 * of the written value should be performed.
530 	 */
531 	header->one.mem_base = 0xfff0;
532 	reg = header->one.mem_base;
533 	header->one.mem_limit = 0x0;
534 	reg = header->one.mem_limit;
535 	header->one.prefetch_base = 0x0;
536 	reg = header->one.prefetch_base;
537 	header->one.prefetch_limit = 0x0;
538 	reg = header->one.prefetch_limit;
539 	header->one.prefetch_base_upper = 0x0;
540 	reg = header->one.prefetch_base_upper;
541 	header->one.prefetch_limit_upper = 0x0;
542 	reg = header->one.prefetch_limit_upper;
543 	header->one.io_base_upper = 0x0;
544 	reg = header->one.io_base_upper;
545 	header->one.io_limit_upper = 0x0;
546 	reg = header->one.io_limit_upper;
547 	header->one.primary = 0;
548 	reg = header->one.primary;
549 	header->one.secondary = 0;
550 	reg = header->one.secondary;
551 	header->one.subordinate = 0;
552 	reg = header->one.subordinate;
553 }
554 
555 static void
556 vmd_init_hotplug(struct vmd_pci_device *dev, struct vmd_pci_bus *bus)
557 {
558 	struct vmd_adapter *vmd = bus->vmd;
559 	struct vmd_hot_plug *hp = &dev->hp;
560 	size_t mem_id;
561 
562 	dev->hotplug_capable = true;
563 	hp->bar.size = 1 << 20;
564 
565 	if (!vmd->scan_completed) {
566 		hp->bar.start = vmd_allocate_base_addr(vmd, NULL, hp->bar.size);
567 		bus->self->header->one.mem_base = BRIDGE_BASEREG(hp->bar.start);
568 		bus->self->header->one.mem_limit =
569 			bus->self->header->one.mem_base + BRIDGE_BASEREG(hp->bar.size - 1);
570 	} else {
571 		hp->bar.start = (uint64_t)bus->self->header->one.mem_base << 16;
572 	}
573 
574 	hp->bar.vaddr = (uint64_t)vmd->mem_vaddr + (hp->bar.start - vmd->membar);
575 
576 	TAILQ_INIT(&hp->free_mem_queue);
577 	TAILQ_INIT(&hp->unused_mem_queue);
578 	TAILQ_INIT(&hp->alloc_mem_queue);
579 
580 	hp->mem[0].size = hp->bar.size;
581 	hp->mem[0].addr = hp->bar.start;
582 
583 	TAILQ_INSERT_TAIL(&hp->free_mem_queue, &hp->mem[0], tailq);
584 
585 	for (mem_id = 1; mem_id < ADDR_ELEM_COUNT; ++mem_id) {
586 		TAILQ_INSERT_TAIL(&hp->unused_mem_queue, &hp->mem[mem_id], tailq);
587 	}
588 
589 	SPDK_INFOLOG(vmd, "%s: mem_base:mem_limit = %x : %x\n", __func__,
590 		     bus->self->header->one.mem_base, bus->self->header->one.mem_limit);
591 }
592 
593 static bool
594 vmd_bus_device_present(struct vmd_pci_bus *bus, uint32_t devfn)
595 {
596 	volatile struct pci_header *header;
597 
598 	header = (volatile struct pci_header *)(bus->vmd->cfg_vaddr +
599 						CONFIG_OFFSET_ADDR(bus->config_bus_number, devfn, 0, 0));
600 	if (!vmd_is_valid_cfg_addr(bus, (uint64_t)header)) {
601 		return false;
602 	}
603 
604 	if (header->common.vendor_id == PCI_INVALID_VENDORID || header->common.vendor_id == 0x0) {
605 		return false;
606 	}
607 
608 	return true;
609 }
610 
611 static struct vmd_pci_device *
612 vmd_alloc_dev(struct vmd_pci_bus *bus, uint32_t devfn)
613 {
614 	struct vmd_pci_device *dev = NULL;
615 	struct pci_header volatile *header;
616 	uint8_t header_type;
617 	uint32_t rev_class;
618 
619 	/* Make sure we're not creating two devices on the same dev/fn */
620 	TAILQ_FOREACH(dev, &bus->dev_list, tailq) {
621 		if (dev->devfn == devfn) {
622 			return NULL;
623 		}
624 	}
625 
626 	if (!vmd_bus_device_present(bus, devfn)) {
627 		return NULL;
628 	}
629 
630 	header = (struct pci_header * volatile)(bus->vmd->cfg_vaddr +
631 						CONFIG_OFFSET_ADDR(bus->config_bus_number, devfn, 0, 0));
632 
633 	SPDK_INFOLOG(vmd, "PCI device found: %04x:%04x ***\n",
634 		     header->common.vendor_id, header->common.device_id);
635 
636 	dev = calloc(1, sizeof(*dev));
637 	if (!dev) {
638 		return NULL;
639 	}
640 
641 	dev->header = header;
642 	dev->vid = dev->header->common.vendor_id;
643 	dev->did = dev->header->common.device_id;
644 	dev->bus = bus;
645 	dev->parent = bus;
646 	dev->devfn = devfn;
647 	header_type = dev->header->common.header_type;
648 	rev_class = dev->header->common.rev_class;
649 	dev->class = rev_class >> 8;
650 	dev->header_type = header_type & 0x7;
651 
652 	if (header_type == PCI_HEADER_TYPE_BRIDGE) {
653 		vmd_update_scan_info(dev);
654 		if (!dev->bus->vmd->scan_completed) {
655 			vmd_reset_base_limit_registers(dev->header);
656 		}
657 	}
658 
659 	vmd_read_config_space(dev);
660 
661 	return dev;
662 }
663 
664 static struct vmd_pci_bus *
665 vmd_create_new_bus(struct vmd_pci_bus *parent, struct vmd_pci_device *bridge, uint8_t bus_number)
666 {
667 	struct vmd_pci_bus *new_bus;
668 
669 	new_bus = calloc(1, sizeof(*new_bus));
670 	if (!new_bus) {
671 		return NULL;
672 	}
673 
674 	new_bus->parent = parent;
675 	new_bus->domain = parent->domain;
676 	new_bus->bus_number = bus_number;
677 	new_bus->secondary_bus = new_bus->subordinate_bus = bus_number;
678 	new_bus->self = bridge;
679 	new_bus->vmd = parent->vmd;
680 	new_bus->config_bus_number = new_bus->bus_number - new_bus->vmd->vmd_bus.bus_start;
681 	TAILQ_INIT(&new_bus->dev_list);
682 
683 	bridge->subordinate = new_bus;
684 
685 	bridge->pci.addr.bus = new_bus->bus_number;
686 	bridge->pci.addr.dev = bridge->devfn;
687 	bridge->pci.addr.func = 0;
688 	bridge->pci.addr.domain = parent->vmd->pci->addr.domain;
689 
690 	return new_bus;
691 }
692 
693 /*
694  * Assigns a bus number from the list of available
695  * bus numbers. If the device is downstream of a hot plug port,
696  * assign the bus number from those assigned to the HP port. Otherwise,
697  * assign the next bus number from the vmd bus number list.
698  */
699 static uint8_t
700 vmd_get_next_bus_number(struct vmd_pci_device *dev, struct vmd_adapter *vmd)
701 {
702 	uint8_t bus = 0xff;
703 	struct vmd_pci_bus *hp_bus;
704 
705 	if (dev) {
706 		hp_bus = vmd_is_dev_in_hotplug_path(dev);
707 		if (hp_bus && hp_bus->self && hp_bus->self->hotplug_capable) {
708 			return vmd_hp_get_next_bus_number(&hp_bus->self->hp);
709 		}
710 	}
711 
712 	/* Device is not under a hot plug path. Return next global bus number */
713 	if ((vmd->next_bus_number + 1) < vmd->max_pci_bus) {
714 		bus = vmd->next_bus_number;
715 		vmd->next_bus_number++;
716 	}
717 	return bus;
718 }
719 
720 static uint8_t
721 vmd_get_hotplug_bus_numbers(struct vmd_pci_device *dev)
722 {
723 	uint8_t bus_number = 0xff;
724 
725 	if (dev && dev->bus && dev->bus->vmd &&
726 	    ((dev->bus->vmd->next_bus_number + RESERVED_HOTPLUG_BUSES) < dev->bus->vmd->max_pci_bus)) {
727 		bus_number = RESERVED_HOTPLUG_BUSES;
728 		dev->bus->vmd->next_bus_number += RESERVED_HOTPLUG_BUSES;
729 	}
730 
731 	return bus_number;
732 }
733 
734 static void
735 vmd_enable_msix(struct vmd_pci_device *dev)
736 {
737 	volatile uint16_t control;
738 
739 	control = dev->msix_cap->message_control.as_uint16_t | (1 << 14);
740 	dev->msix_cap->message_control.as_uint16_t = control;
741 	control = dev->msix_cap->message_control.as_uint16_t;
742 	dev->msix_cap->message_control.as_uint16_t = (control | (1 << 15));
743 	control = dev->msix_cap->message_control.as_uint16_t;
744 	control = control & ~(1 << 14);
745 	dev->msix_cap->message_control.as_uint16_t = control;
746 	control = dev->msix_cap->message_control.as_uint16_t;
747 }
748 
749 static void
750 vmd_disable_msix(struct vmd_pci_device *dev)
751 {
752 	volatile uint16_t control;
753 
754 	control = dev->msix_cap->message_control.as_uint16_t | (1 << 14);
755 	dev->msix_cap->message_control.as_uint16_t = control;
756 	control = dev->msix_cap->message_control.as_uint16_t & ~(1 << 15);
757 	dev->msix_cap->message_control.as_uint16_t = control;
758 	control = dev->msix_cap->message_control.as_uint16_t;
759 }
760 
761 /*
762  * Set up MSI-X table entries for the port. Vmd MSIX vector 0 is used for
763  * port interrupt, so vector 0 is mapped to all MSIX entries for the port.
764  */
765 static void
766 vmd_setup_msix(struct vmd_pci_device *dev, volatile struct pci_msix_table_entry *vmdEntry)
767 {
768 	int entry;
769 
770 	if (!dev || !vmdEntry || !dev->msix_cap) {
771 		return;
772 	}
773 
774 	vmd_disable_msix(dev);
775 	if (dev->msix_table == NULL || dev->msix_table_size > MAX_MSIX_TABLE_SIZE) {
776 		return;
777 	}
778 
779 	for (entry = 0; entry < dev->msix_table_size; ++entry) {
780 		dev->msix_table[entry].vector_control = 1;
781 	}
782 	vmd_enable_msix(dev);
783 }
784 
785 static void
786 vmd_bus_update_bridge_info(struct vmd_pci_device *bridge)
787 {
788 	/* Update the subordinate bus of all bridges above this bridge */
789 	volatile struct vmd_pci_device *dev = bridge;
790 	uint8_t subordinate_bus;
791 
792 	if (!dev) {
793 		return;
794 	}
795 	subordinate_bus = bridge->header->one.subordinate;
796 	while (dev->parent_bridge != NULL) {
797 		dev = dev->parent_bridge;
798 		if (dev->header->one.subordinate < subordinate_bus) {
799 			dev->header->one.subordinate = subordinate_bus;
800 			subordinate_bus = dev->header->one.subordinate;
801 		}
802 	}
803 }
804 
805 static bool
806 vmd_is_supported_device(struct vmd_pci_device *dev)
807 {
808 	return dev->class == PCI_CLASS_STORAGE_EXPRESS;
809 }
810 
811 static int
812 vmd_dev_map_bar(struct spdk_pci_device *pci_dev, uint32_t bar,
813 		void **mapped_addr, uint64_t *phys_addr, uint64_t *size)
814 {
815 	struct vmd_pci_device *dev = SPDK_CONTAINEROF(pci_dev, struct vmd_pci_device, pci);
816 
817 	*size = dev->bar[bar].size;
818 	*phys_addr = dev->bar[bar].start;
819 	*mapped_addr = (void *)dev->bar[bar].vaddr;
820 
821 	return 0;
822 }
823 
824 static int
825 vmd_dev_unmap_bar(struct spdk_pci_device *_dev, uint32_t bar, void *addr)
826 {
827 	return 0;
828 }
829 
830 static int
831 vmd_dev_cfg_read(struct spdk_pci_device *_dev, void *value, uint32_t len,
832 		 uint32_t offset)
833 {
834 	struct vmd_pci_device *dev = SPDK_CONTAINEROF(_dev, struct vmd_pci_device, pci);
835 	volatile uint8_t *src = (volatile uint8_t *)dev->header;
836 	uint8_t *dst = value;
837 	size_t i;
838 
839 	if (len + offset > PCI_MAX_CFG_SIZE) {
840 		return -1;
841 	}
842 
843 	for (i = 0; i < len; ++i) {
844 		dst[i] = src[offset + i];
845 	}
846 
847 	return 0;
848 }
849 
850 static int
851 vmd_dev_cfg_write(struct spdk_pci_device *_dev,  void *value,
852 		  uint32_t len, uint32_t offset)
853 {
854 	struct vmd_pci_device *dev = SPDK_CONTAINEROF(_dev, struct vmd_pci_device, pci);
855 	volatile uint8_t *dst = (volatile uint8_t *)dev->header;
856 	uint8_t *src = value;
857 	size_t i;
858 
859 	if ((len + offset) > PCI_MAX_CFG_SIZE) {
860 		return -1;
861 	}
862 
863 	for (i = 0; i < len; ++i) {
864 		dst[offset + i] = src[i];
865 	}
866 
867 	return 0;
868 }
869 
870 static void
871 vmd_dev_detach(struct spdk_pci_device *dev)
872 {
873 	struct vmd_pci_device *vmd_device = (struct vmd_pci_device *)dev;
874 	struct vmd_pci_device *bus_device = vmd_device->bus->self;
875 	struct vmd_pci_bus *bus = vmd_device->bus;
876 	size_t i, num_bars = vmd_device->header_type ? 2 : 6;
877 
878 	spdk_pci_unhook_device(dev);
879 	TAILQ_REMOVE(&bus->dev_list, vmd_device, tailq);
880 
881 	/* Release the hotplug region if the device is under hotplug-capable bus */
882 	if (bus_device && bus_device->hotplug_capable) {
883 		for (i = 0; i < num_bars; ++i) {
884 			if (vmd_device->bar[i].start != 0) {
885 				vmd_hotplug_free_addr(&bus_device->hp, vmd_device->bar[i].start);
886 			}
887 		}
888 	}
889 
890 	free(dev);
891 }
892 
893 static void
894 vmd_dev_init(struct vmd_pci_device *dev)
895 {
896 	uint8_t bdf[32];
897 
898 	dev->pci.addr.domain = dev->bus->vmd->domain;
899 	dev->pci.addr.bus = dev->bus->bus_number;
900 	dev->pci.addr.dev = dev->devfn;
901 	dev->pci.addr.func = 0;
902 	dev->pci.socket_id = spdk_pci_device_get_socket_id(dev->bus->vmd->pci);
903 	dev->pci.id.vendor_id = dev->header->common.vendor_id;
904 	dev->pci.id.device_id = dev->header->common.device_id;
905 	dev->pci.type = "vmd";
906 	dev->pci.map_bar = vmd_dev_map_bar;
907 	dev->pci.unmap_bar = vmd_dev_unmap_bar;
908 	dev->pci.cfg_read = vmd_dev_cfg_read;
909 	dev->pci.cfg_write = vmd_dev_cfg_write;
910 	dev->hotplug_capable = false;
911 	if (dev->pcie_cap != NULL) {
912 		dev->cached_slot_control = dev->pcie_cap->slot_control;
913 	}
914 
915 	if (vmd_is_supported_device(dev)) {
916 		spdk_pci_addr_fmt(bdf, sizeof(bdf), &dev->pci.addr);
917 		SPDK_INFOLOG(vmd, "Initializing NVMe device at %s\n", bdf);
918 		dev->pci.parent = dev->bus->vmd->pci;
919 		spdk_pci_hook_device(spdk_pci_nvme_get_driver(), &dev->pci);
920 	}
921 }
922 
923 /*
924  * Scans a single bus for all devices attached and return a count of
925  * how many devices found. In the VMD topology, it is assume there are no multi-
926  * function devices. Hence a bus(bridge) will not have multi function with both type
927  * 0 and 1 header.
928  *
929  * The other option  for implementing this function is the bus is an int and
930  * create a new device PciBridge. PciBridge would inherit from PciDevice with extra fields,
931  * sub/pri/sec bus. The input becomes PciPort, bus number and parent_bridge.
932  *
933  * The bus number is scanned and if a device is found, based on the header_type, create
934  * either PciBridge(1) or PciDevice(0).
935  *
936  * If a PciBridge, assign bus numbers and rescan new bus. The currently PciBridge being
937  * scanned becomes the passed in parent_bridge with the new bus number.
938  *
939  * The linked list becomes list of pciBridges with PciDevices attached.
940  *
941  * Return count of how many devices found(type1 + type 0 header devices)
942  */
943 static uint8_t
944 vmd_scan_single_bus(struct vmd_pci_bus *bus, struct vmd_pci_device *parent_bridge)
945 {
946 	/* assuming only single function devices are on the bus */
947 	struct vmd_pci_device *new_dev;
948 	struct vmd_adapter *vmd;
949 	union express_slot_capabilities_register slot_cap;
950 	struct vmd_pci_bus *new_bus;
951 	uint8_t  device_number, dev_cnt = 0;
952 	uint8_t new_bus_num;
953 
954 	for (device_number = 0; device_number < 32; device_number++) {
955 		new_dev = vmd_alloc_dev(bus, device_number);
956 		if (new_dev == NULL) {
957 			continue;
958 		}
959 
960 		dev_cnt++;
961 		if (new_dev->header->common.header_type & PCI_HEADER_TYPE_BRIDGE) {
962 			slot_cap.as_uint32_t = 0;
963 			if (new_dev->pcie_cap != NULL) {
964 				slot_cap.as_uint32_t = new_dev->pcie_cap->slot_cap.as_uint32_t;
965 			}
966 
967 			new_bus_num = vmd_get_next_bus_number(bus->vmd->is_hotplug_scan ? new_dev : NULL, bus->vmd);
968 			if (new_bus_num == 0xff) {
969 				free(new_dev);
970 				return dev_cnt;
971 			}
972 			new_bus = vmd_create_new_bus(bus, new_dev, new_bus_num);
973 			if (!new_bus) {
974 				free(new_dev);
975 				return dev_cnt;
976 			}
977 			new_bus->primary_bus = bus->secondary_bus;
978 			new_bus->self = new_dev;
979 			new_dev->bus_object = new_bus;
980 
981 			if (slot_cap.bit_field.hotplug_capable && new_dev->pcie_cap != NULL &&
982 			    new_dev->pcie_cap->express_cap_register.bit_field.slot_implemented) {
983 				new_bus->hotplug_buses = vmd_get_hotplug_bus_numbers(new_dev);
984 				new_bus->subordinate_bus += new_bus->hotplug_buses;
985 
986 				/* Attach hot plug instance if HP is supported */
987 				/* Hot inserted SSDs can be assigned port bus of sub-ordinate + 1 */
988 				SPDK_INFOLOG(vmd, "hotplug_capable/slot_implemented = "
989 					     "%x:%x\n", slot_cap.bit_field.hotplug_capable,
990 					     new_dev->pcie_cap->express_cap_register.bit_field.slot_implemented);
991 			}
992 
993 			new_dev->parent_bridge = parent_bridge;
994 			new_dev->header->one.primary = new_bus->primary_bus;
995 			new_dev->header->one.secondary = new_bus->secondary_bus;
996 			new_dev->header->one.subordinate = new_bus->subordinate_bus;
997 
998 			vmd_bus_update_bridge_info(new_dev);
999 			TAILQ_INSERT_TAIL(&bus->vmd->bus_list, new_bus, tailq);
1000 
1001 			vmd_dev_init(new_dev);
1002 
1003 			if (slot_cap.bit_field.hotplug_capable && new_dev->pcie_cap != NULL &&
1004 			    new_dev->pcie_cap->express_cap_register.bit_field.slot_implemented) {
1005 				vmd_init_hotplug(new_dev, new_bus);
1006 			}
1007 
1008 			dev_cnt += vmd_scan_single_bus(new_bus, new_dev);
1009 			if (new_dev->pcie_cap != NULL) {
1010 				if (new_dev->pcie_cap->express_cap_register.bit_field.device_type == SwitchUpstreamPort) {
1011 					return dev_cnt;
1012 				}
1013 			}
1014 		} else {
1015 			/* Attach the device to the current bus and assign base addresses */
1016 			TAILQ_INSERT_TAIL(&bus->dev_list, new_dev, tailq);
1017 			g_end_device_count++;
1018 			if (vmd_assign_base_addrs(new_dev)) {
1019 				vmd_setup_msix(new_dev, &bus->vmd->msix_table[0]);
1020 				vmd_dev_init(new_dev);
1021 				if (vmd_is_supported_device(new_dev)) {
1022 					vmd = bus->vmd;
1023 					vmd->target[vmd->nvme_count] = new_dev;
1024 					vmd->nvme_count++;
1025 				}
1026 			} else {
1027 				SPDK_INFOLOG(vmd, "Removing failed device:%p\n", new_dev);
1028 				TAILQ_REMOVE(&bus->dev_list, new_dev, tailq);
1029 				free(new_dev);
1030 				if (dev_cnt) {
1031 					dev_cnt--;
1032 				}
1033 			}
1034 		}
1035 	}
1036 
1037 	return dev_cnt;
1038 }
1039 
1040 static void
1041 vmd_print_pci_info(struct vmd_pci_device *dev)
1042 {
1043 	if (!dev) {
1044 		return;
1045 	}
1046 
1047 	if (dev->pcie_cap != NULL) {
1048 		SPDK_INFOLOG(vmd, "PCI DEVICE: [%04X:%04X] type(%x) : %s\n",
1049 			     dev->header->common.vendor_id, dev->header->common.device_id,
1050 			     dev->pcie_cap->express_cap_register.bit_field.device_type,
1051 			     device_type[dev->pcie_cap->express_cap_register.bit_field.device_type]);
1052 	} else {
1053 		SPDK_INFOLOG(vmd, "PCI DEVICE: [%04X:%04X]\n",
1054 			     dev->header->common.vendor_id, dev->header->common.device_id);
1055 	}
1056 
1057 	SPDK_INFOLOG(vmd, "\tDOMAIN:BDF: %04x:%02x:%02x:%x\n", dev->pci.addr.domain,
1058 		     dev->pci.addr.bus, dev->pci.addr.dev, dev->pci.addr.func);
1059 
1060 	if (!(dev->header_type & PCI_HEADER_TYPE_BRIDGE) && dev->bus) {
1061 		SPDK_INFOLOG(vmd, "\tbase addr: %x : %p\n",
1062 			     dev->header->zero.BAR[0], (void *)dev->bar[0].vaddr);
1063 	}
1064 
1065 	if ((dev->header_type & PCI_HEADER_TYPE_BRIDGE)) {
1066 		SPDK_INFOLOG(vmd, "\tPrimary = %d, Secondary = %d, Subordinate = %d\n",
1067 			     dev->header->one.primary, dev->header->one.secondary, dev->header->one.subordinate);
1068 		if (dev->pcie_cap && dev->pcie_cap->express_cap_register.bit_field.slot_implemented) {
1069 			SPDK_INFOLOG(vmd, "\tSlot implemented on this device.\n");
1070 			if (dev->pcie_cap->slot_cap.bit_field.hotplug_capable) {
1071 				SPDK_INFOLOG(vmd, "Device has HOT-PLUG capable slot.\n");
1072 			}
1073 		}
1074 	}
1075 
1076 	if (dev->sn_cap != NULL) {
1077 		uint8_t *snLow = (uint8_t *)&dev->sn_cap->sn_low;
1078 		uint8_t *snHi = (uint8_t *)&dev->sn_cap->sn_hi;
1079 
1080 		SPDK_INFOLOG(vmd, "\tSN: %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x\n",
1081 			     snHi[3], snHi[2], snHi[1], snHi[0], snLow[3], snLow[2], snLow[1], snLow[0]);
1082 	}
1083 }
1084 
1085 static void
1086 vmd_cache_scan_info(struct vmd_pci_device *dev)
1087 {
1088 	uint32_t reg __attribute__((unused));
1089 
1090 	if (dev->header_type == PCI_HEADER_TYPE_NORMAL) {
1091 		return;
1092 	}
1093 
1094 	SPDK_INFOLOG(vmd, "vendor/device id:%x:%x\n", dev->header->common.vendor_id,
1095 		     dev->header->common.device_id);
1096 
1097 	if (vmd_device_is_root_port(dev->header)) {
1098 		dev->header->one.prefetch_base_upper = VMD_UPPER_BASE_SIGNATURE;
1099 		reg = dev->header->one.prefetch_base_upper;
1100 		dev->header->one.prefetch_limit_upper = VMD_UPPER_LIMIT_SIGNATURE;
1101 		reg = dev->header->one.prefetch_limit_upper;
1102 
1103 		SPDK_INFOLOG(vmd, "prefetch: %x:%x\n",
1104 			     dev->header->one.prefetch_base_upper,
1105 			     dev->header->one.prefetch_limit_upper);
1106 	}
1107 }
1108 
1109 static void
1110 vmd_reset_root_ports(struct vmd_pci_bus *bus)
1111 {
1112 	volatile struct pci_header *header;
1113 	uint32_t devfn;
1114 
1115 	/*
1116 	 * The root ports might have been configured by some other driver (e.g.  Linux kernel) prior
1117 	 * to loading the SPDK one, so we need to clear it.  We need to before the scanning process,
1118 	 * as it's depth-first, so when scanning the initial root ports, the latter ones might still
1119 	 * be using stale configuration.  This can lead to two bridges having the same
1120 	 * secondary/subordinate bus configuration, which, of course, isn't correct.
1121 	 * (Note: this fixed issue #2413.)
1122 	 */
1123 	for (devfn = 0; devfn < 32; ++devfn) {
1124 		if (!vmd_bus_device_present(bus, devfn)) {
1125 			continue;
1126 		}
1127 
1128 		header = (volatile void *)(bus->vmd->cfg_vaddr +
1129 					   CONFIG_OFFSET_ADDR(bus->config_bus_number, devfn, 0, 0));
1130 		if (vmd_device_is_root_port(header) && !vmd_device_is_enumerated(header)) {
1131 			vmd_reset_base_limit_registers(header);
1132 		}
1133 	}
1134 }
1135 
1136 static uint8_t
1137 vmd_scan_pcibus(struct vmd_pci_bus *bus)
1138 {
1139 	struct vmd_pci_bus *bus_entry;
1140 	struct vmd_pci_device *dev;
1141 	uint8_t dev_cnt;
1142 
1143 	vmd_reset_root_ports(bus);
1144 
1145 	g_end_device_count = 0;
1146 	TAILQ_INSERT_TAIL(&bus->vmd->bus_list, bus, tailq);
1147 	bus->vmd->next_bus_number = bus->bus_number + 1;
1148 	dev_cnt = vmd_scan_single_bus(bus, NULL);
1149 
1150 	SPDK_INFOLOG(vmd, "VMD scan found %u devices\n", dev_cnt);
1151 	SPDK_INFOLOG(vmd, "VMD scan found %u END DEVICES\n", g_end_device_count);
1152 
1153 	SPDK_INFOLOG(vmd, "PCIe devices attached to VMD %04x:%02x:%02x:%x...\n",
1154 		     bus->vmd->pci->addr.domain, bus->vmd->pci->addr.bus,
1155 		     bus->vmd->pci->addr.dev, bus->vmd->pci->addr.func);
1156 
1157 	TAILQ_FOREACH(bus_entry, &bus->vmd->bus_list, tailq) {
1158 		if (bus_entry->self != NULL) {
1159 			vmd_print_pci_info(bus_entry->self);
1160 			vmd_cache_scan_info(bus_entry->self);
1161 		}
1162 
1163 		TAILQ_FOREACH(dev, &bus_entry->dev_list, tailq) {
1164 			vmd_print_pci_info(dev);
1165 		}
1166 	}
1167 
1168 	return dev_cnt;
1169 }
1170 
1171 static int
1172 vmd_map_bars(struct vmd_adapter *vmd, struct spdk_pci_device *dev)
1173 {
1174 	int rc;
1175 
1176 	rc = spdk_pci_device_map_bar(dev, 0, (void **)&vmd->cfg_vaddr,
1177 				     &vmd->cfgbar, &vmd->cfgbar_size);
1178 	if (rc == 0) {
1179 		rc = spdk_pci_device_map_bar(dev, 2, (void **)&vmd->mem_vaddr,
1180 					     &vmd->membar, &vmd->membar_size);
1181 	}
1182 
1183 	if (rc == 0) {
1184 		rc = spdk_pci_device_map_bar(dev, 4, (void **)&vmd->msix_vaddr,
1185 					     &vmd->msixbar, &vmd->msixbar_size);
1186 	}
1187 
1188 	if (rc == 0) {
1189 		vmd->physical_addr = vmd->membar;
1190 		vmd->current_addr_size = vmd->membar_size;
1191 	}
1192 	return rc;
1193 }
1194 
1195 static void
1196 vmd_set_starting_bus_number(struct vmd_adapter *vmd, uint8_t *bus_start,
1197 			    uint8_t *max_bus)
1198 {
1199 	uint32_t vmd_cap = 0, vmd_config = 0;
1200 	uint8_t bus_restrict_cap, bus_restrictions;
1201 
1202 	spdk_pci_device_cfg_read32(vmd->pci, &vmd_cap, PCI_VMD_VMCAP);
1203 	spdk_pci_device_cfg_read32(vmd->pci, &vmd_config, PCI_VMD_VMCONFIG);
1204 
1205 	bus_restrict_cap = vmd_cap & 0x1; /* bit 0 */
1206 	bus_restrictions = (vmd_config >> 8) & 0x3; /* bits 8-9 */
1207 	if ((bus_restrict_cap == 0x1) && (bus_restrictions == 0x1)) {
1208 		*bus_start = 128;
1209 		*max_bus = 255;
1210 	} else {
1211 		*bus_start = 0;
1212 		*max_bus = 127;
1213 	}
1214 }
1215 
1216 static int
1217 vmd_enumerate_devices(struct vmd_adapter *vmd)
1218 {
1219 	uint8_t max_bus, bus_start;
1220 
1221 	vmd->vmd_bus.vmd = vmd;
1222 	vmd->vmd_bus.domain = vmd->pci->addr.domain;
1223 
1224 	if (vmd->pci->id.device_id == PCI_DEVICE_ID_INTEL_VMD_ICX) {
1225 		vmd_set_starting_bus_number(vmd, &bus_start, &max_bus);
1226 		vmd->vmd_bus.bus_start = bus_start;
1227 		vmd->vmd_bus.secondary_bus = vmd->vmd_bus.subordinate_bus = vmd->vmd_bus.bus_start;
1228 		vmd->vmd_bus.primary_bus = vmd->vmd_bus.bus_number = vmd->vmd_bus.bus_start;
1229 		vmd->max_pci_bus = max_bus;
1230 	} else {
1231 		vmd->vmd_bus.bus_start = 0;
1232 		vmd->vmd_bus.secondary_bus = vmd->vmd_bus.subordinate_bus = 0;
1233 		vmd->vmd_bus.primary_bus = vmd->vmd_bus.bus_number = 0;
1234 		vmd->max_pci_bus = PCI_MAX_BUS_NUMBER;
1235 	}
1236 
1237 	return vmd_scan_pcibus(&vmd->vmd_bus);
1238 }
1239 
1240 struct vmd_pci_device *
1241 vmd_find_device(const struct spdk_pci_addr *addr)
1242 {
1243 	struct vmd_pci_bus *bus;
1244 	struct vmd_pci_device *dev;
1245 	int i;
1246 
1247 	for (i = 0; i < MAX_VMD_TARGET; ++i) {
1248 		TAILQ_FOREACH(bus, &g_vmd_container.vmd[i].bus_list, tailq) {
1249 			if (bus->self) {
1250 				if (spdk_pci_addr_compare(&bus->self->pci.addr, addr) == 0) {
1251 					return bus->self;
1252 				}
1253 			}
1254 
1255 			TAILQ_FOREACH(dev, &bus->dev_list, tailq) {
1256 				if (spdk_pci_addr_compare(&dev->pci.addr, addr) == 0) {
1257 					return dev;
1258 				}
1259 			}
1260 		}
1261 	}
1262 
1263 	return NULL;
1264 }
1265 
1266 static int
1267 vmd_enum_cb(void *ctx, struct spdk_pci_device *pci_dev)
1268 {
1269 	uint32_t cmd_reg = 0;
1270 	char bdf[32] = {0};
1271 	struct vmd_container *vmd_c = ctx;
1272 	size_t i;
1273 
1274 	spdk_pci_device_cfg_read32(pci_dev, &cmd_reg, 4);
1275 	cmd_reg |= 0x6;                      /* PCI bus master/memory enable. */
1276 	spdk_pci_device_cfg_write32(pci_dev, cmd_reg, 4);
1277 
1278 	spdk_pci_addr_fmt(bdf, sizeof(bdf), &pci_dev->addr);
1279 	SPDK_INFOLOG(vmd, "Found a VMD[ %d ] at %s\n", vmd_c->count, bdf);
1280 
1281 	/* map vmd bars */
1282 	i = vmd_c->count;
1283 	vmd_c->vmd[i].pci = pci_dev;
1284 	vmd_c->vmd[i].vmd_index = i;
1285 	vmd_c->vmd[i].domain =
1286 		(pci_dev->addr.bus << 16) | (pci_dev->addr.dev << 8) | pci_dev->addr.func;
1287 	TAILQ_INIT(&vmd_c->vmd[i].bus_list);
1288 
1289 	if (vmd_map_bars(&vmd_c->vmd[i], pci_dev) == -1) {
1290 		return -1;
1291 	}
1292 
1293 	SPDK_INFOLOG(vmd, "vmd config bar(%p) vaddr(%p) size(%x)\n",
1294 		     (void *)vmd_c->vmd[i].cfgbar, (void *)vmd_c->vmd[i].cfg_vaddr,
1295 		     (uint32_t)vmd_c->vmd[i].cfgbar_size);
1296 	SPDK_INFOLOG(vmd, "vmd mem bar(%p) vaddr(%p) size(%x)\n",
1297 		     (void *)vmd_c->vmd[i].membar, (void *)vmd_c->vmd[i].mem_vaddr,
1298 		     (uint32_t)vmd_c->vmd[i].membar_size);
1299 	SPDK_INFOLOG(vmd, "vmd msix bar(%p) vaddr(%p) size(%x)\n\n",
1300 		     (void *)vmd_c->vmd[i].msixbar, (void *)vmd_c->vmd[i].msix_vaddr,
1301 		     (uint32_t)vmd_c->vmd[i].msixbar_size);
1302 
1303 	vmd_c->count = i + 1;
1304 
1305 	vmd_enumerate_devices(&vmd_c->vmd[i]);
1306 
1307 	return 0;
1308 }
1309 
1310 int
1311 spdk_vmd_pci_device_list(struct spdk_pci_addr vmd_addr, struct spdk_pci_device *nvme_list)
1312 {
1313 	int cnt = 0;
1314 	struct vmd_pci_bus *bus;
1315 	struct vmd_pci_device *dev;
1316 
1317 	if (!nvme_list) {
1318 		return -1;
1319 	}
1320 
1321 	for (int i = 0; i < MAX_VMD_TARGET; ++i) {
1322 		if (spdk_pci_addr_compare(&vmd_addr, &g_vmd_container.vmd[i].pci->addr) == 0) {
1323 			TAILQ_FOREACH(bus, &g_vmd_container.vmd[i].bus_list, tailq) {
1324 				TAILQ_FOREACH(dev, &bus->dev_list, tailq) {
1325 					nvme_list[cnt++] = dev->pci;
1326 					if (!dev->is_hooked) {
1327 						vmd_dev_init(dev);
1328 						dev->is_hooked = 1;
1329 					}
1330 				}
1331 			}
1332 		}
1333 	}
1334 
1335 	return cnt;
1336 }
1337 
1338 static void
1339 vmd_clear_hotplug_status(struct vmd_pci_bus *bus)
1340 {
1341 	struct vmd_pci_device *device = bus->self;
1342 	uint16_t status __attribute__((unused));
1343 
1344 	status = device->pcie_cap->slot_status.as_uint16_t;
1345 	device->pcie_cap->slot_status.as_uint16_t = status;
1346 	status = device->pcie_cap->slot_status.as_uint16_t;
1347 
1348 	status = device->pcie_cap->link_status.as_uint16_t;
1349 	device->pcie_cap->link_status.as_uint16_t = status;
1350 	status = device->pcie_cap->link_status.as_uint16_t;
1351 }
1352 
1353 static void
1354 vmd_bus_handle_hotplug(struct vmd_pci_bus *bus)
1355 {
1356 	uint8_t num_devices, sleep_count;
1357 
1358 	for (sleep_count = 0; sleep_count < 20; ++sleep_count) {
1359 		/* Scan until a new device is found */
1360 		num_devices = vmd_scan_single_bus(bus, bus->self);
1361 		if (num_devices > 0) {
1362 			break;
1363 		}
1364 
1365 		spdk_delay_us(200000);
1366 	}
1367 
1368 	if (num_devices == 0) {
1369 		SPDK_ERRLOG("Timed out while scanning for hotplugged devices\n");
1370 	}
1371 }
1372 
1373 static void
1374 vmd_bus_handle_hotremove(struct vmd_pci_bus *bus)
1375 {
1376 	struct vmd_pci_device *device, *tmpdev;
1377 
1378 	TAILQ_FOREACH_SAFE(device, &bus->dev_list, tailq, tmpdev) {
1379 		if (!vmd_bus_device_present(bus, device->devfn)) {
1380 			device->pci.internal.pending_removal = true;
1381 
1382 			/* If the device isn't attached, remove it immediately */
1383 			if (!device->pci.internal.attached) {
1384 				vmd_dev_detach(&device->pci);
1385 			}
1386 		}
1387 	}
1388 }
1389 
1390 int
1391 spdk_vmd_hotplug_monitor(void)
1392 {
1393 	struct vmd_pci_bus *bus;
1394 	struct vmd_pci_device *device;
1395 	int num_hotplugs = 0;
1396 	uint32_t i;
1397 
1398 	for (i = 0; i < g_vmd_container.count; ++i) {
1399 		TAILQ_FOREACH(bus, &g_vmd_container.vmd[i].bus_list, tailq) {
1400 			device = bus->self;
1401 			if (device == NULL || !device->hotplug_capable) {
1402 				continue;
1403 			}
1404 
1405 			if (device->pcie_cap->slot_status.bit_field.datalink_state_changed != 1) {
1406 				continue;
1407 			}
1408 
1409 			if (device->pcie_cap->link_status.bit_field.datalink_layer_active == 1) {
1410 				SPDK_INFOLOG(vmd, "Device hotplug detected on bus "
1411 					     "%"PRIu32"\n", bus->bus_number);
1412 				vmd_bus_handle_hotplug(bus);
1413 			} else {
1414 				SPDK_INFOLOG(vmd, "Device hotremove detected on bus "
1415 					     "%"PRIu32"\n", bus->bus_number);
1416 				vmd_bus_handle_hotremove(bus);
1417 			}
1418 
1419 			vmd_clear_hotplug_status(bus);
1420 			num_hotplugs++;
1421 		}
1422 	}
1423 
1424 	return num_hotplugs;
1425 }
1426 
1427 int
1428 spdk_vmd_init(void)
1429 {
1430 	return spdk_pci_enumerate(spdk_pci_vmd_get_driver(), vmd_enum_cb, &g_vmd_container);
1431 }
1432 
1433 void
1434 spdk_vmd_fini(void)
1435 {
1436 	uint32_t i;
1437 
1438 	for (i = 0; i < g_vmd_container.count; ++i) {
1439 		spdk_pci_device_detach(g_vmd_container.vmd[i].pci);
1440 	}
1441 }
1442 
1443 SPDK_LOG_REGISTER_COMPONENT(vmd)
1444