xref: /spdk/lib/vmd/vmd.c (revision 9889ab2dc80e40dae92dcef361d53dcba722043d)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "vmd.h"
35 
36 #include "spdk/stdinc.h"
37 #include "spdk/likely.h"
38 
39 static unsigned char *device_type[] = {
40 	"PCI Express Endpoint",
41 	"Legacy PCI Express Endpoint",
42 	"Reserved 1",
43 	"Reserved 2",
44 	"Root Port of PCI Express Root Complex",
45 	"Upstream Port of PCI Express Switch",
46 	"Downstream Port of PCI Express Switch",
47 	"PCI Express to PCI/PCI-X Bridge",
48 	"PCI/PCI-X to PCI Express Bridge",
49 	"Root Complex Integrated Endpoint",
50 	"Root Complex Event Collector",
51 	"Reserved Capability"
52 };
53 
54 /*
55  * Container for all VMD adapter probed in the system.
56  */
57 struct vmd_container {
58 	uint32_t count;
59 	struct vmd_adapter vmd[MAX_VMD_SUPPORTED];
60 };
61 
62 static struct vmd_container g_vmd_container;
63 static uint8_t g_end_device_count;
64 
65 static bool
66 vmd_is_valid_cfg_addr(struct vmd_pci_bus *bus, uint64_t addr)
67 {
68 	return addr >= (uint64_t)bus->vmd->cfg_vaddr &&
69 	       addr < bus->vmd->cfgbar_size + (uint64_t)bus->vmd->cfg_vaddr;
70 }
71 
72 static void
73 vmd_align_base_addrs(struct vmd_adapter *vmd, uint32_t alignment)
74 {
75 	uint32_t pad;
76 	/*
77 	 *  Device is not in hot plug path, align the base address remaining from membar 1.
78 	 */
79 	if (vmd->physical_addr & (alignment - 1)) {
80 		pad = alignment - (vmd->physical_addr & (alignment - 1));
81 		vmd->physical_addr += pad;
82 		vmd->current_addr_size -= pad;
83 	}
84 }
85 
86 static bool
87 vmd_device_is_enumerated(const struct vmd_pci_device *vmd_device)
88 {
89 	return vmd_device->header->one.prefetch_base_upper == VMD_UPPER_BASE_SIGNATURE &&
90 	       vmd_device->header->one.prefetch_limit_upper == VMD_UPPER_LIMIT_SIGNATURE;
91 }
92 
93 static bool
94 vmd_device_is_root_port(const struct vmd_pci_device *vmd_device)
95 {
96 	return vmd_device->header->common.vendor_id == 0x8086 &&
97 	       (vmd_device->header->common.device_id == 0x2030 ||
98 		vmd_device->header->common.device_id == 0x2031 ||
99 		vmd_device->header->common.device_id == 0x2032 ||
100 		vmd_device->header->common.device_id == 0x2033);
101 }
102 
103 /*
104  *  Allocates an address from vmd membar for the input memory size
105  *  vmdAdapter - vmd adapter object
106  *  dev - vmd_pci_device to allocate a base address for.
107  *  size - size of the memory window requested.
108  *  Size must be an integral multiple of 2. Addresses are returned on the size boundary.
109  *  Returns physical address within the VMD membar window, or 0x0 if cannot allocate window.
110  *  Consider increasing the size of vmd membar if 0x0 is returned.
111  */
112 static uint64_t
113 vmd_allocate_base_addr(struct vmd_adapter *vmd, struct vmd_pci_device *dev, uint32_t size)
114 {
115 	uint64_t base_address = 0;
116 	struct vmd_pci_bus *hp_bus;
117 
118 	if (size && ((size & (~size + 1)) != size)) {
119 		return base_address;
120 	}
121 
122 	/*
123 	 *  If device is downstream of a hot plug port, allocate address from the
124 	 *  range dedicated for the hot plug slot. Search the list of addresses allocated to determine
125 	 *  if a free range exists that satisfy the input request.  If a free range cannot be found,
126 	 *  get a buffer from the  unused chunk. First fit algorithm, is used.
127 	 */
128 	if (dev) {
129 		hp_bus = vmd_is_dev_in_hotplug_path(dev);
130 		if (hp_bus && hp_bus->self && hp_bus->self->hotplug_capable) {
131 			return vmd_hp_allocate_base_addr(&hp_bus->self->hp, size);
132 		}
133 	}
134 
135 	/* Ensure physical membar allocated is size aligned */
136 	if (vmd->physical_addr & (size - 1)) {
137 		uint32_t pad = size - (vmd->physical_addr & (size - 1));
138 		vmd->physical_addr += pad;
139 		vmd->current_addr_size -= pad;
140 	}
141 
142 	/* Allocate from membar if enough memory is left */
143 	if (vmd->current_addr_size >= size) {
144 		base_address = vmd->physical_addr;
145 		vmd->physical_addr += size;
146 		vmd->current_addr_size -= size;
147 	}
148 
149 	SPDK_DEBUGLOG(SPDK_LOG_VMD, "allocated(size) %lx (%x)\n", base_address, size);
150 
151 	return base_address;
152 }
153 
154 static bool
155 vmd_is_end_device(struct vmd_pci_device *dev)
156 {
157 	return (dev && dev->header) &&
158 	       ((dev->header->common.header_type & ~PCI_MULTI_FUNCTION) == PCI_HEADER_TYPE_NORMAL);
159 }
160 
161 static void
162 vmd_update_base_limit_register(struct vmd_pci_device *dev, uint16_t base, uint16_t limit)
163 {
164 	struct vmd_pci_bus *bus;
165 	struct vmd_pci_device *bridge;
166 
167 	if (base == 0 ||  limit == 0) {
168 		return;
169 	}
170 
171 	if (dev->header->common.header_type == PCI_HEADER_TYPE_BRIDGE) {
172 		bus = dev->bus_object;
173 	} else {
174 		bus = dev->parent;
175 	}
176 
177 	bridge = bus->self;
178 	SPDK_DEBUGLOG(SPDK_LOG_VMD, "base:limit = %x:%x\n", bridge->header->one.mem_base,
179 		      bridge->header->one.mem_limit);
180 
181 	if (dev->bus->vmd->scan_completed) {
182 		return;
183 	}
184 
185 	while (bus && bus->self != NULL) {
186 		bridge = bus->self;
187 
188 		/* This is only for 32-bit memory space, need to revisit to support 64-bit */
189 		if (bridge->header->one.mem_base > base) {
190 			bridge->header->one.mem_base = base;
191 			base = bridge->header->one.mem_base;
192 		}
193 
194 		if (bridge->header->one.mem_limit < limit) {
195 			bridge->header->one.mem_limit = limit;
196 			limit = bridge->header->one.mem_limit;
197 		}
198 
199 		bus = bus->parent;
200 	}
201 }
202 
203 static uint64_t
204 vmd_get_base_addr(struct vmd_pci_device *dev, uint32_t index)
205 {
206 	struct vmd_pci_bus *bus = dev->parent;
207 
208 	if (dev->header_type == PCI_HEADER_TYPE_BRIDGE) {
209 		return dev->header->zero.BAR[index] & ~0xf;
210 	} else {
211 		return (uint64_t)bus->self->header->one.mem_base << 16;
212 	}
213 }
214 
215 static bool
216 vmd_assign_base_addrs(struct vmd_pci_device *dev)
217 {
218 	uint16_t mem_base = 0, mem_limit = 0;
219 	unsigned char mem_attr = 0;
220 	int last;
221 	struct vmd_adapter *vmd = NULL;
222 	bool ret_val = false;
223 	uint32_t bar_value;
224 	uint32_t table_offset;
225 
226 	if (dev && dev->bus) {
227 		vmd = dev->bus->vmd;
228 	}
229 
230 	if (!vmd) {
231 		return 0;
232 	}
233 
234 	vmd_align_base_addrs(vmd, ONE_MB);
235 
236 	last = dev->header_type ? 2 : 6;
237 	for (int i = 0; i < last; i++) {
238 		bar_value = dev->header->zero.BAR[i];
239 		dev->header->zero.BAR[i] = ~(0U);
240 		dev->bar[i].size = dev->header->zero.BAR[i];
241 		dev->header->zero.BAR[i] = bar_value;
242 
243 		if (dev->bar[i].size == ~(0U) || dev->bar[i].size == 0  ||
244 		    dev->header->zero.BAR[i] & 1) {
245 			dev->bar[i].size = 0;
246 			continue;
247 		}
248 		mem_attr = dev->bar[i].size & PCI_BASE_ADDR_MASK;
249 		dev->bar[i].size = TWOS_COMPLEMENT(dev->bar[i].size & PCI_BASE_ADDR_MASK);
250 
251 		if (vmd->scan_completed) {
252 			dev->bar[i].start = vmd_get_base_addr(dev, i);
253 		} else {
254 			dev->bar[i].start = vmd_allocate_base_addr(vmd, dev, dev->bar[i].size);
255 		}
256 
257 		dev->header->zero.BAR[i] = (uint32_t)dev->bar[i].start;
258 
259 		if (!dev->bar[i].start) {
260 			if (mem_attr == (PCI_BAR_MEMORY_PREFETCH | PCI_BAR_MEMORY_TYPE_64)) {
261 				i++;
262 			}
263 			continue;
264 		}
265 
266 		dev->bar[i].vaddr = ((uint64_t)vmd->mem_vaddr + (dev->bar[i].start - vmd->membar));
267 		mem_limit = BRIDGE_BASEREG(dev->header->zero.BAR[i]) +
268 			    BRIDGE_BASEREG(dev->bar[i].size - 1);
269 		if (!mem_base) {
270 			mem_base = BRIDGE_BASEREG(dev->header->zero.BAR[i]);
271 		}
272 
273 		ret_val = true;
274 
275 		if (mem_attr == (PCI_BAR_MEMORY_PREFETCH | PCI_BAR_MEMORY_TYPE_64)) {
276 			i++;
277 			if (i < last) {
278 				dev->header->zero.BAR[i] = (uint32_t)(dev->bar[i].start >> PCI_DWORD_SHIFT);
279 			}
280 		}
281 	}
282 
283 	/* Enable device MEM and bus mastering */
284 	dev->header->zero.command |= (PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
285 	uint16_t cmd = dev->header->zero.command;
286 	cmd++;
287 
288 	if (dev->msix_cap && ret_val) {
289 		table_offset = ((volatile struct pci_msix_cap *)dev->msix_cap)->msix_table_offset;
290 		if (dev->bar[table_offset & 0x3].vaddr) {
291 			dev->msix_table = (volatile struct pci_msix_table_entry *)
292 					  (dev->bar[table_offset & 0x3].vaddr + (table_offset & 0xfff8));
293 		}
294 	}
295 
296 	if (ret_val && vmd_is_end_device(dev)) {
297 		vmd_update_base_limit_register(dev, mem_base, mem_limit);
298 	}
299 
300 	return ret_val;
301 }
302 
303 static void
304 vmd_get_device_capabilities(struct vmd_pci_device *dev)
305 
306 {
307 	volatile uint8_t *config_space;
308 	uint8_t capabilities_offset;
309 	struct pci_capabilities_header *capabilities_hdr;
310 
311 	config_space = (volatile uint8_t *)dev->header;
312 	if ((dev->header->common.status  & PCI_CAPABILITIES_LIST) == 0) {
313 		return;
314 	}
315 
316 	capabilities_offset = dev->header->zero.cap_pointer;
317 	if (dev->header->common.header_type & PCI_HEADER_TYPE_BRIDGE) {
318 		capabilities_offset = dev->header->one.cap_pointer;
319 	}
320 
321 	while (capabilities_offset > 0) {
322 		capabilities_hdr = (struct pci_capabilities_header *)
323 				   &config_space[capabilities_offset];
324 		switch (capabilities_hdr->capability_id) {
325 		case CAPABILITY_ID_PCI_EXPRESS:
326 			dev->pcie_cap = (volatile struct pci_express_cap *)(capabilities_hdr);
327 			break;
328 
329 		case CAPABILITY_ID_MSI:
330 			dev->msi_cap = (volatile struct pci_msi_cap *)capabilities_hdr;
331 			break;
332 
333 		case CAPABILITY_ID_MSIX:
334 			dev->msix_cap = (volatile struct pci_msix_capability *)capabilities_hdr;
335 			dev->msix_table_size = dev->msix_cap->message_control.bit.table_size + 1;
336 			break;
337 
338 		default:
339 			break;
340 		}
341 		capabilities_offset = capabilities_hdr->next;
342 	}
343 }
344 
345 static volatile struct pci_enhanced_capability_header *
346 vmd_get_enhanced_capabilities(struct vmd_pci_device *dev, uint16_t capability_id)
347 {
348 	uint8_t *data;
349 	uint16_t cap_offset = EXTENDED_CAPABILITY_OFFSET;
350 	volatile struct pci_enhanced_capability_header *cap_hdr = NULL;
351 
352 	data = (uint8_t *)dev->header;
353 	while (cap_offset >= EXTENDED_CAPABILITY_OFFSET) {
354 		cap_hdr = (volatile struct pci_enhanced_capability_header *) &data[cap_offset];
355 		if (cap_hdr->capability_id == capability_id) {
356 			return cap_hdr;
357 		}
358 		cap_offset = cap_hdr->next;
359 		if (cap_offset == 0 || cap_offset < EXTENDED_CAPABILITY_OFFSET) {
360 			break;
361 		}
362 	}
363 
364 	return NULL;
365 }
366 
367 static void
368 vmd_read_config_space(struct vmd_pci_device *dev)
369 {
370 	/*
371 	 * Writes to the pci config space is posted weite. To ensure transaction reaches its destination
372 	 * before another write is posed, an immediate read of the written value should be performed.
373 	 */
374 	dev->header->common.command |= (BUS_MASTER_ENABLE | MEMORY_SPACE_ENABLE);
375 	{ uint16_t cmd = dev->header->common.command; (void)cmd; }
376 
377 	vmd_get_device_capabilities(dev);
378 	dev->sn_cap = (struct serial_number_capability *)vmd_get_enhanced_capabilities(dev,
379 			DEVICE_SERIAL_NUMBER_CAP_ID);
380 }
381 
382 static void
383 vmd_update_scan_info(struct vmd_pci_device *dev)
384 {
385 	struct vmd_adapter *vmd_adapter = dev->bus->vmd;
386 
387 	if (vmd_adapter->root_port_updated) {
388 		return;
389 	}
390 
391 	if (dev->header_type == PCI_HEADER_TYPE_NORMAL) {
392 		return;
393 	}
394 
395 	if (vmd_device_is_root_port(dev)) {
396 		vmd_adapter->root_port_updated = 1;
397 		SPDK_DEBUGLOG(SPDK_LOG_VMD, "root_port_updated = %d\n",
398 			      vmd_adapter->root_port_updated);
399 		SPDK_DEBUGLOG(SPDK_LOG_VMD, "upper:limit = %x : %x\n",
400 			      dev->header->one.prefetch_base_upper,
401 			      dev->header->one.prefetch_limit_upper);
402 		if (vmd_device_is_enumerated(dev)) {
403 			vmd_adapter->scan_completed = 1;
404 			SPDK_DEBUGLOG(SPDK_LOG_VMD, "scan_completed = %d\n",
405 				      vmd_adapter->scan_completed);
406 		}
407 	}
408 }
409 
410 static void
411 vmd_reset_base_limit_registers(struct vmd_pci_device *dev)
412 {
413 	uint32_t reg __attribute__((unused));
414 
415 	assert(dev->header_type != PCI_HEADER_TYPE_NORMAL);
416 	/*
417 	 * Writes to the pci config space are posted writes.
418 	 * To ensure transaction reaches its destination
419 	 * before another write is posted, an immediate read
420 	 * of the written value should be performed.
421 	 */
422 	dev->header->one.mem_base = 0xfff0;
423 	reg = dev->header->one.mem_base;
424 	dev->header->one.mem_limit = 0x0;
425 	reg = dev->header->one.mem_limit;
426 	dev->header->one.prefetch_base = 0x0;
427 	reg = dev->header->one.prefetch_base;
428 	dev->header->one.prefetch_limit = 0x0;
429 	reg = dev->header->one.prefetch_limit;
430 	dev->header->one.prefetch_base_upper = 0x0;
431 	reg = dev->header->one.prefetch_base_upper;
432 	dev->header->one.prefetch_limit_upper = 0x0;
433 	reg = dev->header->one.prefetch_limit_upper;
434 	dev->header->one.io_base_upper = 0x0;
435 	reg = dev->header->one.io_base_upper;
436 	dev->header->one.io_limit_upper = 0x0;
437 	reg = dev->header->one.io_limit_upper;
438 	dev->header->one.primary = 0;
439 	reg = dev->header->one.primary;
440 	dev->header->one.secondary = 0;
441 	reg = dev->header->one.secondary;
442 	dev->header->one.subordinate = 0;
443 	reg = dev->header->one.subordinate;
444 }
445 
446 static void
447 vmd_init_hotplug(struct vmd_pci_device *dev, struct vmd_pci_bus *bus)
448 {
449 	struct vmd_adapter *vmd = bus->vmd;
450 	struct vmd_hot_plug *hp = &dev->hp;
451 
452 	dev->hotplug_capable = true;
453 	hp->bar.size = 1 << 20;
454 
455 	if (!vmd->scan_completed) {
456 		hp->bar.start = vmd_allocate_base_addr(vmd, NULL, hp->bar.size);
457 		bus->self->header->one.mem_base = BRIDGE_BASEREG(hp->bar.start);
458 		bus->self->header->one.mem_limit =
459 			bus->self->header->one.mem_base + BRIDGE_BASEREG(hp->bar.size - 1);
460 	} else {
461 		hp->bar.start = (uint64_t)bus->self->header->one.mem_base << 16;
462 	}
463 
464 	hp->bar.vaddr = (uint64_t)vmd->mem_vaddr + (hp->bar.start - vmd->membar);
465 
466 	SPDK_DEBUGLOG(SPDK_LOG_VMD, "%s: mem_base:mem_limit = %x : %x\n", __func__,
467 		      bus->self->header->one.mem_base, bus->self->header->one.mem_limit);
468 }
469 
470 static struct vmd_pci_device *
471 vmd_alloc_dev(struct vmd_pci_bus *bus, uint32_t devfn)
472 {
473 	struct vmd_pci_device *dev = NULL;
474 	struct pci_header volatile *header;
475 	uint8_t header_type;
476 	uint32_t rev_class;
477 
478 	header = (struct pci_header * volatile)(bus->vmd->cfg_vaddr +
479 						CONFIG_OFFSET_ADDR(bus->bus_number, devfn, 0, 0));
480 	if (!vmd_is_valid_cfg_addr(bus, (uint64_t)header)) {
481 		return NULL;
482 	}
483 
484 	if (header->common.vendor_id == PCI_INVALID_VENDORID || header->common.vendor_id == 0x0) {
485 		return NULL;
486 	}
487 
488 	SPDK_DEBUGLOG(SPDK_LOG_VMD, "PCI device found: %04x:%04x ***\n",
489 		      header->common.vendor_id, header->common.device_id);
490 
491 	dev = calloc(1, sizeof(*dev));
492 	if (!dev) {
493 		return NULL;
494 	}
495 
496 	dev->header = header;
497 	dev->vid = dev->header->common.vendor_id;
498 	dev->did = dev->header->common.device_id;
499 	dev->bus = bus;
500 	dev->parent = bus;
501 	dev->devfn = devfn;
502 	header_type = dev->header->common.header_type;
503 	rev_class = dev->header->common.rev_class;
504 	dev->class = rev_class >> 8;
505 	dev->header_type = header_type & 0x7;
506 
507 	if (header_type == PCI_HEADER_TYPE_BRIDGE) {
508 		vmd_update_scan_info(dev);
509 		if (!dev->bus->vmd->scan_completed) {
510 			vmd_reset_base_limit_registers(dev);
511 		}
512 	}
513 
514 	vmd_read_config_space(dev);
515 
516 	return dev;
517 }
518 
519 static struct vmd_pci_bus *
520 vmd_create_new_bus(struct vmd_pci_bus *parent, struct vmd_pci_device *bridge, uint8_t bus_number)
521 {
522 	struct vmd_pci_bus *new_bus;
523 
524 	new_bus = calloc(1, sizeof(*new_bus));
525 	if (!new_bus) {
526 		return NULL;
527 	}
528 
529 	new_bus->parent = parent;
530 	new_bus->domain = parent->domain;
531 	new_bus->bus_number = bus_number;
532 	new_bus->secondary_bus = new_bus->subordinate_bus = bus_number;
533 	new_bus->self = bridge;
534 	new_bus->vmd = parent->vmd;
535 	TAILQ_INIT(&new_bus->dev_list);
536 
537 	bridge->subordinate = new_bus;
538 
539 	bridge->pci.addr.bus = new_bus->bus_number;
540 	bridge->pci.addr.dev = bridge->devfn;
541 	bridge->pci.addr.func = 0;
542 	bridge->pci.addr.domain = parent->vmd->pci.addr.domain;
543 
544 	return new_bus;
545 }
546 
547 /*
548  * Assigns a bus number from the list of available
549  * bus numbers. If the device is downstream of a hot plug port,
550  * assign the bus number from thiose assigned to the HP port. Otherwise,
551  * assign the next bus number from the vmd bus number list.
552  */
553 static uint8_t
554 vmd_get_next_bus_number(struct vmd_pci_device *dev, struct vmd_adapter *vmd)
555 {
556 	uint8_t bus = 0xff;
557 	struct vmd_pci_bus *hp_bus;
558 
559 	if (dev) {
560 		hp_bus = vmd_is_dev_in_hotplug_path(dev);
561 		if (hp_bus && hp_bus->self && hp_bus->self->hotplug_capable) {
562 			return vmd_hp_get_next_bus_number(&hp_bus->self->hp);
563 		}
564 	}
565 
566 	/* Device is not under a hot plug path. Return next global bus number */
567 	if ((vmd->next_bus_number + 1) < vmd->max_pci_bus) {
568 		bus = vmd->next_bus_number;
569 		vmd->next_bus_number++;
570 	}
571 	return bus;
572 }
573 
574 static uint8_t
575 vmd_get_hotplug_bus_numbers(struct vmd_pci_device *dev)
576 {
577 	uint8_t bus_number = 0xff;
578 
579 	if (dev && dev->bus && dev->bus->vmd &&
580 	    ((dev->bus->vmd->next_bus_number + RESERVED_HOTPLUG_BUSES) < dev->bus->vmd->max_pci_bus)) {
581 		bus_number = RESERVED_HOTPLUG_BUSES;
582 		dev->bus->vmd->next_bus_number += RESERVED_HOTPLUG_BUSES;
583 	}
584 
585 	return bus_number;
586 }
587 
588 static void
589 vmd_enable_msix(struct vmd_pci_device *dev)
590 {
591 	volatile uint16_t control;
592 
593 	control = dev->msix_cap->message_control.as_uint16_t | (1 << 14);
594 	dev->msix_cap->message_control.as_uint16_t = control;
595 	control = dev->msix_cap->message_control.as_uint16_t;
596 	dev->msix_cap->message_control.as_uint16_t = (control | (1 << 15));
597 	control = dev->msix_cap->message_control.as_uint16_t;
598 	control = control & ~(1 << 14);
599 	dev->msix_cap->message_control.as_uint16_t = control;
600 	control = dev->msix_cap->message_control.as_uint16_t;
601 }
602 
603 static void
604 vmd_disable_msix(struct vmd_pci_device *dev)
605 {
606 	volatile uint16_t control;
607 
608 	control = dev->msix_cap->message_control.as_uint16_t | (1 << 14);
609 	dev->msix_cap->message_control.as_uint16_t = control;
610 	control = dev->msix_cap->message_control.as_uint16_t & ~(1 << 15);
611 	dev->msix_cap->message_control.as_uint16_t = control;
612 	control = dev->msix_cap->message_control.as_uint16_t;
613 }
614 
615 /*
616  * Set up MSI-X table entries for the port. Vmd MSIX vector 0 is used for
617  * port interrupt, so vector 0 is mapped to all MSIX entries for the port.
618  */
619 static void
620 vmd_setup_msix(struct vmd_pci_device *dev, volatile struct pci_msix_table_entry *vmdEntry)
621 {
622 	int entry;
623 
624 	if (!dev || !vmdEntry || !dev->msix_cap) {
625 		return;
626 	}
627 
628 	vmd_disable_msix(dev);
629 	if (dev->msix_table == NULL || dev->msix_table_size > MAX_MSIX_TABLE_SIZE) {
630 		return;
631 	}
632 
633 	for (entry = 0; entry < dev->msix_table_size; ++entry) {
634 		dev->msix_table[entry].vector_control = 1;
635 	}
636 	vmd_enable_msix(dev);
637 }
638 
639 static void
640 vmd_bus_update_bridge_info(struct vmd_pci_device *bridge)
641 {
642 	/* Update the subordinate bus of all bridges above this bridge */
643 	volatile struct vmd_pci_device *dev = bridge;
644 	uint8_t subordinate_bus;
645 
646 	if (!dev) {
647 		return;
648 	}
649 	subordinate_bus = bridge->header->one.subordinate;
650 	while (dev->parent_bridge != NULL) {
651 		dev = dev->parent_bridge;
652 		if (dev->header->one.subordinate < subordinate_bus) {
653 			dev->header->one.subordinate = subordinate_bus;
654 			subordinate_bus = dev->header->one.subordinate;
655 		}
656 	}
657 }
658 
659 static bool
660 vmd_is_supported_device(struct vmd_pci_device *dev)
661 {
662 	return dev->class == PCI_CLASS_STORAGE_EXPRESS;
663 }
664 
665 static int
666 vmd_dev_map_bar(struct spdk_pci_device *pci_dev, uint32_t bar,
667 		void **mapped_addr, uint64_t *phys_addr, uint64_t *size)
668 {
669 	struct vmd_pci_device *dev = SPDK_CONTAINEROF(pci_dev, struct vmd_pci_device, pci);
670 
671 	*size = dev->bar[bar].size;
672 	*phys_addr = dev->bar[bar].start;
673 	*mapped_addr = (void *)dev->bar[bar].vaddr;
674 
675 	return 0;
676 }
677 
678 static int
679 vmd_dev_unmap_bar(struct spdk_pci_device *_dev, uint32_t bar, void *addr)
680 {
681 	return 0;
682 }
683 
684 static int
685 vmd_dev_cfg_read(struct spdk_pci_device *_dev, void *value, uint32_t len,
686 		 uint32_t offset)
687 {
688 	struct vmd_pci_device *dev = SPDK_CONTAINEROF(_dev, struct vmd_pci_device, pci);
689 	volatile uint8_t *src = (volatile uint8_t *)dev->header;
690 	uint8_t *dst = value;
691 	size_t i;
692 
693 	if (len + offset > PCI_MAX_CFG_SIZE) {
694 		return -1;
695 	}
696 
697 	for (i = 0; i < len; ++i) {
698 		dst[i] = src[offset + i];
699 	}
700 
701 	return 0;
702 }
703 
704 static int
705 vmd_dev_cfg_write(struct spdk_pci_device *_dev,  void *value,
706 		  uint32_t len, uint32_t offset)
707 {
708 	struct vmd_pci_device *dev = SPDK_CONTAINEROF(_dev, struct vmd_pci_device, pci);
709 	volatile uint8_t *dst = (volatile uint8_t *)dev->header;
710 	uint8_t *src = value;
711 	size_t i;
712 
713 	if ((len + offset) > PCI_MAX_CFG_SIZE) {
714 		return -1;
715 	}
716 
717 	for (i = 0; i < len; ++i) {
718 		dst[offset + i] = src[i];
719 	}
720 
721 	return 0;
722 }
723 
724 static void
725 vmd_dev_detach(struct spdk_pci_device *dev)
726 {
727 	return;
728 }
729 
730 static void
731 vmd_dev_init(struct vmd_pci_device *dev)
732 {
733 	uint8_t bdf[32];
734 
735 	dev->pci.addr.domain = dev->bus->vmd->domain;
736 	dev->pci.addr.bus = dev->bus->bus_number;
737 	dev->pci.addr.dev = dev->devfn;
738 	dev->pci.addr.func = 0;
739 	dev->pci.id.vendor_id = dev->header->common.vendor_id;
740 	dev->pci.id.device_id = dev->header->common.device_id;
741 	dev->pci.type = "vmd";
742 	dev->pci.map_bar = vmd_dev_map_bar;
743 	dev->pci.unmap_bar = vmd_dev_unmap_bar;
744 	dev->pci.cfg_read = vmd_dev_cfg_read;
745 	dev->pci.cfg_write = vmd_dev_cfg_write;
746 	dev->pci.detach = vmd_dev_detach;
747 	dev->cached_slot_control = dev->pcie_cap->slot_control;
748 	dev->hotplug_capable = false;
749 
750 	if (vmd_is_supported_device(dev)) {
751 		spdk_pci_addr_fmt(bdf, sizeof(bdf), &dev->pci.addr);
752 		SPDK_DEBUGLOG(SPDK_LOG_VMD, "Initalizing NVMe device at %s\n", bdf);
753 		dev->pci.parent = &dev->bus->vmd->pci;
754 		spdk_pci_hook_device(spdk_pci_nvme_get_driver(), &dev->pci);
755 	}
756 }
757 
758 /*
759  * Scans a single bus for all devices attached and return a count of
760  * how many devices found. In the VMD topology, it is assume there are no multi-
761  * function devices. Hence a bus(bridge) will not have multi function with both type
762  * 0 and 1 header.
763  *
764  * The other option  for implementing this function is the bus is an int and
765  * create a new device PciBridge. PciBridge would inherit from PciDevice with extra fields,
766  * sub/pri/sec bus. The input becomes PciPort, bus number and parent_bridge.
767  *
768  * The bus number is scanned and if a device is found, based on the header_type, create
769  * either PciBridge(1) or PciDevice(0).
770  *
771  * If a PciBridge, assign bus numbers and rescan new bus. The currenty PciBridge being
772  * scanned becomes the passed in parent_bridge with the new bus number.
773  *
774  * The linked list becomes list of pciBridges with PciDevices attached.
775  *
776  * Return count of how many devices found(type1 + type 0 header devices)
777  */
778 static uint8_t
779 vmd_scan_single_bus(struct vmd_pci_bus *bus, struct vmd_pci_device *parent_bridge)
780 {
781 	/* assuming only single function devices are on the bus */
782 	struct vmd_pci_device *new_dev;
783 	struct vmd_adapter *vmd;
784 	union express_slot_capabilities_register slot_cap;
785 	struct vmd_pci_bus *new_bus;
786 	uint8_t  device_number, dev_cnt = 0;
787 	uint8_t new_bus_num;
788 
789 	for (device_number = 0; device_number < 32; device_number++) {
790 		new_dev = vmd_alloc_dev(bus, device_number);
791 		if (new_dev == NULL) {
792 			continue;
793 		}
794 
795 		dev_cnt++;
796 		if (new_dev->header->common.header_type & PCI_HEADER_TYPE_BRIDGE) {
797 			slot_cap.as_uint32_t = 0;
798 			if (new_dev->pcie_cap != NULL) {
799 				slot_cap.as_uint32_t = new_dev->pcie_cap->slot_cap.as_uint32_t;
800 			}
801 
802 			new_bus_num = vmd_get_next_bus_number(bus->vmd->is_hotplug_scan ? new_dev : NULL, bus->vmd);
803 			if (new_bus_num == 0xff) {
804 				free(new_dev);
805 				return dev_cnt;
806 			}
807 			new_bus = vmd_create_new_bus(bus, new_dev, new_bus_num);
808 			if (!new_bus) {
809 				free(new_dev);
810 				return dev_cnt;
811 			}
812 			new_bus->primary_bus = bus->secondary_bus;
813 			new_bus->self = new_dev;
814 			new_dev->bus_object = new_bus;
815 
816 			if (slot_cap.bit_field.hotplug_capable && new_dev->pcie_cap != NULL &&
817 			    new_dev->pcie_cap->express_cap_register.bit_field.slot_implemented) {
818 				new_bus->hotplug_buses = vmd_get_hotplug_bus_numbers(new_dev);
819 				new_bus->subordinate_bus += new_bus->hotplug_buses;
820 			}
821 			new_dev->parent_bridge = parent_bridge;
822 			new_dev->header->one.primary = new_bus->primary_bus;
823 			new_dev->header->one.secondary = new_bus->secondary_bus;
824 			new_dev->header->one.subordinate = new_bus->subordinate_bus;
825 
826 			vmd_bus_update_bridge_info(new_dev);
827 			TAILQ_INSERT_TAIL(&bus->vmd->bus_list, new_bus, tailq);
828 
829 			/* Attach hot plug instance if HP is supported */
830 			/* Hot inserted SSDs can be assigned port bus of sub-ordinate + 1 */
831 			SPDK_DEBUGLOG(SPDK_LOG_VMD, "bit_field.hotplug_capable:slot_implemented = %x:%x\n",
832 				      slot_cap.bit_field.hotplug_capable,
833 				      new_dev->pcie_cap->express_cap_register.bit_field.slot_implemented);
834 
835 			vmd_dev_init(new_dev);
836 
837 			if (slot_cap.bit_field.hotplug_capable &&
838 			    new_dev->pcie_cap->express_cap_register.bit_field.slot_implemented) {
839 				vmd_init_hotplug(new_dev, new_bus);
840 			}
841 
842 			dev_cnt += vmd_scan_single_bus(new_bus, new_dev);
843 			if (new_dev->pcie_cap != NULL) {
844 				if (new_dev->pcie_cap->express_cap_register.bit_field.device_type == SwitchUpstreamPort) {
845 					return dev_cnt;
846 				}
847 			}
848 		} else {
849 			/* Attach the device to the current bus and assign base addresses */
850 			TAILQ_INSERT_TAIL(&bus->dev_list, new_dev, tailq);
851 			g_end_device_count++;
852 			if (vmd_assign_base_addrs(new_dev)) {
853 				vmd_setup_msix(new_dev, &bus->vmd->msix_table[0]);
854 				vmd_dev_init(new_dev);
855 				if (vmd_is_supported_device(new_dev)) {
856 					vmd = bus->vmd;
857 					vmd->target[vmd->nvme_count] = new_dev;
858 					vmd->nvme_count++;
859 				}
860 			} else {
861 				SPDK_DEBUGLOG(SPDK_LOG_VMD, "Removing failed device:%p\n", new_dev);
862 				TAILQ_REMOVE(&bus->dev_list, new_dev, tailq);
863 				if (dev_cnt) {
864 					dev_cnt--;
865 				}
866 			}
867 		}
868 	}
869 
870 	return dev_cnt;
871 }
872 
873 static void
874 vmd_print_pci_info(struct vmd_pci_device *dev)
875 {
876 	if (!dev) {
877 		return;
878 	}
879 
880 	if (dev->pcie_cap != NULL) {
881 		SPDK_INFOLOG(SPDK_LOG_VMD, "PCI DEVICE: [%04X:%04X] type(%x) : %s\n",
882 			     dev->header->common.vendor_id, dev->header->common.device_id,
883 			     dev->pcie_cap->express_cap_register.bit_field.device_type,
884 			     device_type[dev->pcie_cap->express_cap_register.bit_field.device_type]);
885 	} else {
886 		SPDK_INFOLOG(SPDK_LOG_VMD, "PCI DEVICE: [%04X:%04X]\n",
887 			     dev->header->common.vendor_id, dev->header->common.device_id);
888 	}
889 
890 	SPDK_INFOLOG(SPDK_LOG_VMD, "\tDOMAIN:BDF: %04x:%02x:%02x:%x\n", dev->pci.addr.domain,
891 		     dev->pci.addr.bus, dev->pci.addr.dev, dev->pci.addr.func);
892 
893 	if (!(dev->header_type & PCI_HEADER_TYPE_BRIDGE) && dev->bus) {
894 		SPDK_INFOLOG(SPDK_LOG_VMD, "\tbase addr: %x : %p\n",
895 			     dev->header->zero.BAR[0], (void *)dev->bar[0].vaddr);
896 	}
897 
898 	if ((dev->header_type & PCI_HEADER_TYPE_BRIDGE)) {
899 		SPDK_INFOLOG(SPDK_LOG_VMD, "\tPrimary = %d, Secondary = %d, Subordinate = %d\n",
900 			     dev->header->one.primary, dev->header->one.secondary, dev->header->one.subordinate);
901 		if (dev->pcie_cap && dev->pcie_cap->express_cap_register.bit_field.slot_implemented) {
902 			SPDK_INFOLOG(SPDK_LOG_VMD, "\tSlot implemented on this device.\n");
903 			if (dev->pcie_cap->slot_cap.bit_field.hotplug_capable) {
904 				SPDK_INFOLOG(SPDK_LOG_VMD, "Device has HOT-PLUG capable slot.\n");
905 			}
906 		}
907 	}
908 
909 	if (dev->sn_cap != NULL) {
910 		uint8_t *snLow = (uint8_t *)&dev->sn_cap->sn_low;
911 		uint8_t *snHi = (uint8_t *)&dev->sn_cap->sn_hi;
912 
913 		SPDK_INFOLOG(SPDK_LOG_VMD, "\tSN: %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x\n",
914 			     snHi[3], snHi[2], snHi[1], snHi[0], snLow[3], snLow[2], snLow[1], snLow[0]);
915 	}
916 }
917 
918 static void
919 vmd_cache_scan_info(struct vmd_pci_device *dev)
920 {
921 	uint32_t reg __attribute__((unused));
922 
923 	if (dev->header_type == PCI_HEADER_TYPE_NORMAL) {
924 		return;
925 	}
926 
927 	SPDK_DEBUGLOG(SPDK_LOG_VMD, "vendor/device id:%x:%x\n", dev->header->common.vendor_id,
928 		      dev->header->common.device_id);
929 
930 	if (vmd_device_is_root_port(dev)) {
931 		dev->header->one.prefetch_base_upper = VMD_UPPER_BASE_SIGNATURE;
932 		reg = dev->header->one.prefetch_base_upper;
933 		dev->header->one.prefetch_limit_upper = VMD_UPPER_LIMIT_SIGNATURE;
934 		reg = dev->header->one.prefetch_limit_upper;
935 
936 		SPDK_DEBUGLOG(SPDK_LOG_VMD, "prefetch: %x:%x\n",
937 			      dev->header->one.prefetch_base_upper,
938 			      dev->header->one.prefetch_limit_upper);
939 	}
940 }
941 
942 static uint8_t
943 vmd_scan_pcibus(struct vmd_pci_bus *bus)
944 {
945 	struct vmd_pci_bus *bus_entry;
946 	struct vmd_pci_device *dev;
947 	uint8_t dev_cnt;
948 
949 	g_end_device_count = 0;
950 	TAILQ_INSERT_TAIL(&bus->vmd->bus_list, bus, tailq);
951 	bus->vmd->next_bus_number = bus->bus_number + 1;
952 	dev_cnt = vmd_scan_single_bus(bus, NULL);
953 
954 	SPDK_DEBUGLOG(SPDK_LOG_VMD, "VMD scan found %u devices\n", dev_cnt);
955 	SPDK_DEBUGLOG(SPDK_LOG_VMD, "VMD scan found %u END DEVICES\n", g_end_device_count);
956 
957 	SPDK_INFOLOG(SPDK_LOG_VMD, "PCIe devices attached to VMD %04x:%02x:%02x:%x...\n",
958 		     bus->vmd->pci.addr.domain, bus->vmd->pci.addr.bus,
959 		     bus->vmd->pci.addr.dev, bus->vmd->pci.addr.func);
960 
961 	TAILQ_FOREACH(bus_entry, &bus->vmd->bus_list, tailq) {
962 		if (bus_entry->self != NULL) {
963 			vmd_print_pci_info(bus_entry->self);
964 			vmd_cache_scan_info(bus_entry->self);
965 		}
966 
967 		TAILQ_FOREACH(dev, &bus_entry->dev_list, tailq) {
968 			vmd_print_pci_info(dev);
969 		}
970 	}
971 
972 	return dev_cnt;
973 }
974 
975 
976 static int
977 vmd_map_bars(struct vmd_adapter *vmd, struct spdk_pci_device *dev)
978 {
979 	int rc = spdk_pci_device_map_bar(dev, 0, (void **)&vmd->cfg_vaddr,
980 					 &vmd->cfgbar, &vmd->cfgbar_size);
981 	if (rc == 0) {
982 		rc = spdk_pci_device_map_bar(dev, 2, (void **)&vmd->mem_vaddr,
983 					     &vmd->membar, &vmd->membar_size);
984 	}
985 
986 	if (rc == 0) {
987 		rc = spdk_pci_device_map_bar(dev, 4, (void **)&vmd->msix_vaddr,
988 					     &vmd->msixbar, &vmd->msixbar_size);
989 	}
990 
991 	if (rc == 0) {
992 		vmd->physical_addr = vmd->membar;
993 		vmd->current_addr_size = vmd->membar_size;
994 	}
995 	return rc;
996 }
997 
998 static int
999 vmd_enumerate_devices(struct vmd_adapter *vmd)
1000 {
1001 	vmd->vmd_bus.vmd = vmd;
1002 	vmd->vmd_bus.secondary_bus = vmd->vmd_bus.subordinate_bus = 0;
1003 	vmd->vmd_bus.primary_bus = vmd->vmd_bus.bus_number = 0;
1004 	vmd->vmd_bus.domain = vmd->pci.addr.domain;
1005 
1006 	return vmd_scan_pcibus(&vmd->vmd_bus);
1007 }
1008 
1009 struct vmd_pci_device *
1010 vmd_find_device(const struct spdk_pci_addr *addr)
1011 {
1012 	struct vmd_pci_bus *bus;
1013 	struct vmd_pci_device *dev;
1014 	int i;
1015 
1016 	for (i = 0; i < MAX_VMD_TARGET; ++i) {
1017 		TAILQ_FOREACH(bus, &g_vmd_container.vmd[i].bus_list, tailq) {
1018 			if (bus->self) {
1019 				if (spdk_pci_addr_compare(&bus->self->pci.addr, addr) == 0) {
1020 					return bus->self;
1021 				}
1022 			}
1023 
1024 			TAILQ_FOREACH(dev, &bus->dev_list, tailq) {
1025 				if (spdk_pci_addr_compare(&dev->pci.addr, addr) == 0) {
1026 					return dev;
1027 				}
1028 			}
1029 		}
1030 	}
1031 
1032 	return NULL;
1033 }
1034 
1035 static int
1036 vmd_enum_cb(void *ctx, struct spdk_pci_device *pci_dev)
1037 {
1038 	uint32_t cmd_reg = 0;
1039 	char bdf[32] = {0};
1040 	struct vmd_container *vmd_c = ctx;
1041 	size_t i;
1042 
1043 	spdk_pci_device_cfg_read32(pci_dev, &cmd_reg, 4);
1044 	cmd_reg |= 0x6;                      /* PCI bus master/memory enable. */
1045 	spdk_pci_device_cfg_write32(pci_dev, cmd_reg, 4);
1046 
1047 	spdk_pci_addr_fmt(bdf, sizeof(bdf), &pci_dev->addr);
1048 	SPDK_DEBUGLOG(SPDK_LOG_VMD, "Found a VMD[ %d ] at %s\n", vmd_c->count, bdf);
1049 
1050 	/* map vmd bars */
1051 	i = vmd_c->count;
1052 	vmd_c->vmd[i].pci = *pci_dev;
1053 	vmd_c->vmd[i].vmd_index = i;
1054 	vmd_c->vmd[i].domain =
1055 		(pci_dev->addr.bus << 16) | (pci_dev->addr.dev << 8) | pci_dev->addr.func;
1056 	vmd_c->vmd[i].max_pci_bus = PCI_MAX_BUS_NUMBER;
1057 	TAILQ_INIT(&vmd_c->vmd[i].bus_list);
1058 
1059 	if (vmd_map_bars(&vmd_c->vmd[i], pci_dev) == -1) {
1060 		return -1;
1061 	}
1062 
1063 	SPDK_DEBUGLOG(SPDK_LOG_VMD, "vmd config bar(%p) vaddr(%p) size(%x)\n",
1064 		      (void *)vmd_c->vmd[i].cfgbar, (void *)vmd_c->vmd[i].cfg_vaddr,
1065 		      (uint32_t)vmd_c->vmd[i].cfgbar_size);
1066 	SPDK_DEBUGLOG(SPDK_LOG_VMD, "vmd mem bar(%p) vaddr(%p) size(%x)\n",
1067 		      (void *)vmd_c->vmd[i].membar, (void *)vmd_c->vmd[i].mem_vaddr,
1068 		      (uint32_t)vmd_c->vmd[i].membar_size);
1069 	SPDK_DEBUGLOG(SPDK_LOG_VMD, "vmd msix bar(%p) vaddr(%p) size(%x)\n\n",
1070 		      (void *)vmd_c->vmd[i].msixbar, (void *)vmd_c->vmd[i].msix_vaddr,
1071 		      (uint32_t)vmd_c->vmd[i].msixbar_size);
1072 
1073 	vmd_c->count = i + 1;
1074 
1075 	vmd_enumerate_devices(&vmd_c->vmd[i]);
1076 
1077 	return 0;
1078 }
1079 
1080 int
1081 spdk_vmd_pci_device_list(struct spdk_pci_addr vmd_addr, struct spdk_pci_device *nvme_list)
1082 {
1083 	int cnt = 0;
1084 	struct vmd_pci_bus *bus;
1085 	struct vmd_pci_device *dev;
1086 
1087 	if (!nvme_list) {
1088 		return -1;
1089 	}
1090 
1091 	for (int i = 0; i < MAX_VMD_TARGET; ++i) {
1092 		if (spdk_pci_addr_compare(&vmd_addr, &g_vmd_container.vmd[i].pci.addr) == 0) {
1093 			TAILQ_FOREACH(bus, &g_vmd_container.vmd[i].bus_list, tailq) {
1094 				TAILQ_FOREACH(dev, &bus->dev_list, tailq) {
1095 					nvme_list[cnt++] = dev->pci;
1096 					if (!dev->is_hooked) {
1097 						vmd_dev_init(dev);
1098 						dev->is_hooked = 1;
1099 					}
1100 				}
1101 			}
1102 		}
1103 	}
1104 
1105 	return cnt;
1106 }
1107 
1108 int
1109 spdk_vmd_init(void)
1110 {
1111 	return spdk_pci_enumerate(spdk_pci_vmd_get_driver(), vmd_enum_cb, &g_vmd_container);
1112 }
1113 
1114 SPDK_LOG_REGISTER_COMPONENT("vmd", SPDK_LOG_VMD)
1115