xref: /spdk/lib/env_dpdk/pci.c (revision 3ef479ab163d96d6fd7f28b256d2a93ab42afd8e)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "env_internal.h"
35 
36 #include "spdk/env.h"
37 
38 #define SYSFS_PCI_DRIVERS	"/sys/bus/pci/drivers"
39 
40 #define PCI_CFG_SIZE		256
41 #define PCI_EXT_CAP_ID_SN	0x03
42 
43 int
44 spdk_pci_device_init(struct rte_pci_driver *driver,
45 		     struct rte_pci_device *device)
46 {
47 	struct spdk_pci_enum_ctx *ctx = (struct spdk_pci_enum_ctx *)driver;
48 	int rc;
49 
50 	if (!ctx->cb_fn) {
51 #if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
52 		rte_pci_unmap_device(device);
53 #elif RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0)
54 		rte_eal_pci_unmap_device(device);
55 #endif
56 
57 		/* Return a positive value to indicate that this device does not belong to this driver, but
58 		 * this isn't an error. */
59 		return 1;
60 	}
61 
62 	if (device->kdrv == RTE_KDRV_VFIO) {
63 		/*
64 		 * TODO: This is a workaround for an issue where the device is not ready after VFIO reset.
65 		 * Figure out what is actually going wrong and remove this sleep.
66 		 */
67 		usleep(500 * 1000);
68 	}
69 
70 	rc = ctx->cb_fn(ctx->cb_arg, (struct spdk_pci_device *)device);
71 	if (rc != 0) {
72 		return rc;
73 	}
74 
75 	spdk_vtophys_get_ref();
76 	return 0;
77 }
78 
79 int
80 spdk_pci_device_fini(struct rte_pci_device *device)
81 {
82 	spdk_vtophys_put_ref();
83 	return 0;
84 }
85 
86 void
87 spdk_pci_device_detach(struct spdk_pci_device *device)
88 {
89 #if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0)
90 #if RTE_VERSION < RTE_VERSION_NUM(17, 05, 0, 0)
91 	rte_eal_device_remove(&device->device);
92 #endif
93 #endif
94 
95 #if RTE_VERSION >= RTE_VERSION_NUM(17, 11, 0, 3)
96 	struct spdk_pci_addr	addr;
97 	char			bdf[32];
98 
99 	addr.domain = device->addr.domain;
100 	addr.bus = device->addr.bus;
101 	addr.dev = device->addr.devid;
102 	addr.func = device->addr.function;
103 
104 	spdk_pci_addr_fmt(bdf, sizeof(bdf), &addr);
105 	if (rte_eal_dev_detach(&device->device) < 0) {
106 		fprintf(stderr, "Failed to detach PCI device %s (device already removed?).\n", bdf);
107 	}
108 #elif RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
109 	rte_pci_detach(&device->addr);
110 #else
111 	rte_eal_pci_detach(&device->addr);
112 #endif
113 }
114 
115 int
116 spdk_pci_device_attach(struct spdk_pci_enum_ctx *ctx,
117 		       spdk_pci_enum_cb enum_cb,
118 		       void *enum_ctx, struct spdk_pci_addr *pci_address)
119 {
120 #if RTE_VERSION >= RTE_VERSION_NUM(17, 11, 0, 3)
121 	char				bdf[32];
122 
123 	spdk_pci_addr_fmt(bdf, sizeof(bdf), pci_address);
124 #else
125 	struct rte_pci_addr		addr;
126 
127 	addr.domain = pci_address->domain;
128 	addr.bus = pci_address->bus;
129 	addr.devid = pci_address->dev;
130 	addr.function = pci_address->func;
131 #endif
132 
133 	pthread_mutex_lock(&ctx->mtx);
134 
135 	if (!ctx->is_registered) {
136 		ctx->is_registered = true;
137 #if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
138 		rte_pci_register(&ctx->driver);
139 #else
140 		rte_eal_pci_register(&ctx->driver);
141 #endif
142 	}
143 
144 	ctx->cb_fn = enum_cb;
145 	ctx->cb_arg = enum_ctx;
146 
147 #if RTE_VERSION >= RTE_VERSION_NUM(17, 11, 0, 3)
148 	if (rte_eal_dev_attach(bdf, "") != 0) {
149 #elif RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
150 	if (rte_pci_probe_one(&addr) != 0) {
151 #else
152 	if (rte_eal_pci_probe_one(&addr) != 0) {
153 #endif
154 		ctx->cb_arg = NULL;
155 		ctx->cb_fn = NULL;
156 		pthread_mutex_unlock(&ctx->mtx);
157 		return -1;
158 	}
159 
160 	ctx->cb_arg = NULL;
161 	ctx->cb_fn = NULL;
162 	pthread_mutex_unlock(&ctx->mtx);
163 
164 	return 0;
165 }
166 
167 /* Note: You can call spdk_pci_enumerate from more than one thread
168  *       simultaneously safely, but you cannot call spdk_pci_enumerate
169  *       and rte_eal_pci_probe simultaneously.
170  */
171 int
172 spdk_pci_enumerate(struct spdk_pci_enum_ctx *ctx,
173 		   spdk_pci_enum_cb enum_cb,
174 		   void *enum_ctx)
175 {
176 	pthread_mutex_lock(&ctx->mtx);
177 
178 	if (!ctx->is_registered) {
179 		ctx->is_registered = true;
180 #if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
181 		rte_pci_register(&ctx->driver);
182 #else
183 		rte_eal_pci_register(&ctx->driver);
184 #endif
185 	}
186 
187 	ctx->cb_fn = enum_cb;
188 	ctx->cb_arg = enum_ctx;
189 
190 #if RTE_VERSION >= RTE_VERSION_NUM(17, 11, 0, 3)
191 	if (rte_bus_probe() != 0) {
192 #elif RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
193 	if (rte_pci_probe() != 0) {
194 #else
195 	if (rte_eal_pci_probe() != 0) {
196 #endif
197 		ctx->cb_arg = NULL;
198 		ctx->cb_fn = NULL;
199 		pthread_mutex_unlock(&ctx->mtx);
200 		return -1;
201 	}
202 
203 	ctx->cb_arg = NULL;
204 	ctx->cb_fn = NULL;
205 	pthread_mutex_unlock(&ctx->mtx);
206 
207 	return 0;
208 }
209 
210 struct spdk_pci_device *
211 spdk_pci_get_device(struct spdk_pci_addr *pci_addr)
212 {
213 	struct rte_pci_device	*dev;
214 	struct rte_pci_addr	addr;
215 	int			rc;
216 
217 	addr.domain = pci_addr->domain;
218 	addr.bus = pci_addr->bus;
219 	addr.devid = pci_addr->dev;
220 	addr.function = pci_addr->func;
221 
222 #if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 2)
223 	FOREACH_DEVICE_ON_PCIBUS(dev) {
224 #else
225 	TAILQ_FOREACH(dev, &pci_device_list, next) {
226 #endif
227 		rc = rte_eal_compare_pci_addr(&dev->addr, &addr);
228 		if (rc < 0) {
229 			continue;
230 		}
231 
232 		if (rc == 0) {
233 			return (struct spdk_pci_device *)dev;
234 		} else {
235 			break;
236 		}
237 	}
238 
239 	return NULL;
240 }
241 
242 int
243 spdk_pci_device_map_bar(struct spdk_pci_device *device, uint32_t bar,
244 			void **mapped_addr, uint64_t *phys_addr, uint64_t *size)
245 {
246 	struct rte_pci_device *dev = device;
247 
248 	*mapped_addr = dev->mem_resource[bar].addr;
249 	*phys_addr = (uint64_t)dev->mem_resource[bar].phys_addr;
250 	*size = (uint64_t)dev->mem_resource[bar].len;
251 
252 	return 0;
253 }
254 
255 int
256 spdk_pci_device_unmap_bar(struct spdk_pci_device *device, uint32_t bar, void *addr)
257 {
258 	return 0;
259 }
260 
261 uint32_t
262 spdk_pci_device_get_domain(struct spdk_pci_device *dev)
263 {
264 	return dev->addr.domain;
265 }
266 
267 uint8_t
268 spdk_pci_device_get_bus(struct spdk_pci_device *dev)
269 {
270 	return dev->addr.bus;
271 }
272 
273 uint8_t
274 spdk_pci_device_get_dev(struct spdk_pci_device *dev)
275 {
276 	return dev->addr.devid;
277 }
278 
279 uint8_t
280 spdk_pci_device_get_func(struct spdk_pci_device *dev)
281 {
282 	return dev->addr.function;
283 }
284 
285 uint16_t
286 spdk_pci_device_get_vendor_id(struct spdk_pci_device *dev)
287 {
288 	return dev->id.vendor_id;
289 }
290 
291 uint16_t
292 spdk_pci_device_get_device_id(struct spdk_pci_device *dev)
293 {
294 	return dev->id.device_id;
295 }
296 
297 uint16_t
298 spdk_pci_device_get_subvendor_id(struct spdk_pci_device *dev)
299 {
300 	return dev->id.subsystem_vendor_id;
301 }
302 
303 uint16_t
304 spdk_pci_device_get_subdevice_id(struct spdk_pci_device *dev)
305 {
306 	return dev->id.subsystem_device_id;
307 }
308 
309 struct spdk_pci_id
310 spdk_pci_device_get_id(struct spdk_pci_device *pci_dev)
311 {
312 	struct spdk_pci_id pci_id;
313 
314 	pci_id.vendor_id = spdk_pci_device_get_vendor_id(pci_dev);
315 	pci_id.device_id = spdk_pci_device_get_device_id(pci_dev);
316 	pci_id.subvendor_id = spdk_pci_device_get_subvendor_id(pci_dev);
317 	pci_id.subdevice_id = spdk_pci_device_get_subdevice_id(pci_dev);
318 
319 	return pci_id;
320 }
321 
322 int
323 spdk_pci_device_get_socket_id(struct spdk_pci_device *pci_dev)
324 {
325 #if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0)
326 	return pci_dev->device.numa_node;
327 #else
328 	return pci_dev->numa_node;
329 #endif
330 }
331 
332 int
333 spdk_pci_device_cfg_read(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
334 {
335 	int rc;
336 
337 #if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
338 	rc = rte_pci_read_config(dev, value, len, offset);
339 #else
340 	rc = rte_eal_pci_read_config(dev, value, len, offset);
341 #endif
342 	return (rc > 0 && (uint32_t) rc == len) ? 0 : -1;
343 }
344 
345 int
346 spdk_pci_device_cfg_write(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset)
347 {
348 	int rc;
349 
350 #if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
351 	rc = rte_pci_write_config(dev, value, len, offset);
352 #else
353 	rc = rte_eal_pci_write_config(dev, value, len, offset);
354 #endif
355 	return (rc > 0 && (uint32_t) rc == len) ? 0 : -1;
356 }
357 
358 int
359 spdk_pci_device_cfg_read8(struct spdk_pci_device *dev, uint8_t *value, uint32_t offset)
360 {
361 	return spdk_pci_device_cfg_read(dev, value, 1, offset);
362 }
363 
364 int
365 spdk_pci_device_cfg_write8(struct spdk_pci_device *dev, uint8_t value, uint32_t offset)
366 {
367 	return spdk_pci_device_cfg_write(dev, &value, 1, offset);
368 }
369 
370 int
371 spdk_pci_device_cfg_read16(struct spdk_pci_device *dev, uint16_t *value, uint32_t offset)
372 {
373 	return spdk_pci_device_cfg_read(dev, value, 2, offset);
374 }
375 
376 int
377 spdk_pci_device_cfg_write16(struct spdk_pci_device *dev, uint16_t value, uint32_t offset)
378 {
379 	return spdk_pci_device_cfg_write(dev, &value, 2, offset);
380 }
381 
382 int
383 spdk_pci_device_cfg_read32(struct spdk_pci_device *dev, uint32_t *value, uint32_t offset)
384 {
385 	return spdk_pci_device_cfg_read(dev, value, 4, offset);
386 }
387 
388 int
389 spdk_pci_device_cfg_write32(struct spdk_pci_device *dev, uint32_t value, uint32_t offset)
390 {
391 	return spdk_pci_device_cfg_write(dev, &value, 4, offset);
392 }
393 
394 int
395 spdk_pci_device_get_serial_number(struct spdk_pci_device *dev, char *sn, size_t len)
396 {
397 	int err;
398 	uint32_t pos, header = 0;
399 	uint32_t i, buf[2];
400 
401 	if (len < 17) {
402 		return -1;
403 	}
404 
405 	err = spdk_pci_device_cfg_read32(dev, &header, PCI_CFG_SIZE);
406 	if (err || !header) {
407 		return -1;
408 	}
409 
410 	pos = PCI_CFG_SIZE;
411 	while (1) {
412 		if ((header & 0x0000ffff) == PCI_EXT_CAP_ID_SN) {
413 			if (pos) {
414 				/* skip the header */
415 				pos += 4;
416 				for (i = 0; i < 2; i++) {
417 					err = spdk_pci_device_cfg_read32(dev, &buf[i], pos + 4 * i);
418 					if (err) {
419 						return -1;
420 					}
421 				}
422 				snprintf(sn, len, "%08x%08x", buf[1], buf[0]);
423 				return 0;
424 			}
425 		}
426 		pos = (header >> 20) & 0xffc;
427 		/* 0 if no other items exist */
428 		if (pos < PCI_CFG_SIZE) {
429 			return -1;
430 		}
431 		err = spdk_pci_device_cfg_read32(dev, &header, pos);
432 		if (err) {
433 			return -1;
434 		}
435 	}
436 	return -1;
437 }
438 
439 struct spdk_pci_addr
440 spdk_pci_device_get_addr(struct spdk_pci_device *pci_dev)
441 {
442 	struct spdk_pci_addr pci_addr;
443 
444 	pci_addr.domain = spdk_pci_device_get_domain(pci_dev);
445 	pci_addr.bus = spdk_pci_device_get_bus(pci_dev);
446 	pci_addr.dev = spdk_pci_device_get_dev(pci_dev);
447 	pci_addr.func = spdk_pci_device_get_func(pci_dev);
448 
449 	return pci_addr;
450 }
451 
452 int
453 spdk_pci_addr_compare(const struct spdk_pci_addr *a1, const struct spdk_pci_addr *a2)
454 {
455 	if (a1->domain > a2->domain) {
456 		return 1;
457 	} else if (a1->domain < a2->domain) {
458 		return -1;
459 	} else if (a1->bus > a2->bus) {
460 		return 1;
461 	} else if (a1->bus < a2->bus) {
462 		return -1;
463 	} else if (a1->dev > a2->dev) {
464 		return 1;
465 	} else if (a1->dev < a2->dev) {
466 		return -1;
467 	} else if (a1->func > a2->func) {
468 		return 1;
469 	} else if (a1->func < a2->func) {
470 		return -1;
471 	}
472 
473 	return 0;
474 }
475 
476 #ifdef __linux__
477 int
478 spdk_pci_device_claim(const struct spdk_pci_addr *pci_addr)
479 {
480 	int dev_fd;
481 	char dev_name[64];
482 	int pid;
483 	void *dev_map;
484 	struct flock pcidev_lock = {
485 		.l_type = F_WRLCK,
486 		.l_whence = SEEK_SET,
487 		.l_start = 0,
488 		.l_len = 0,
489 	};
490 
491 	snprintf(dev_name, sizeof(dev_name), "/tmp/spdk_pci_lock_%04x:%02x:%02x.%x", pci_addr->domain,
492 		 pci_addr->bus,
493 		 pci_addr->dev, pci_addr->func);
494 
495 	dev_fd = open(dev_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
496 	if (dev_fd == -1) {
497 		fprintf(stderr, "could not open %s\n", dev_name);
498 		return -1;
499 	}
500 
501 	if (ftruncate(dev_fd, sizeof(int)) != 0) {
502 		fprintf(stderr, "could not truncate %s\n", dev_name);
503 		close(dev_fd);
504 		return -1;
505 	}
506 
507 	dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
508 		       MAP_SHARED, dev_fd, 0);
509 	if (dev_map == NULL) {
510 		fprintf(stderr, "could not mmap dev %s\n", dev_name);
511 		close(dev_fd);
512 		return -1;
513 	}
514 
515 	if (fcntl(dev_fd, F_SETLK, &pcidev_lock) != 0) {
516 		pid = *(int *)dev_map;
517 		fprintf(stderr, "Cannot create lock on device %s, probably"
518 			" process %d has claimed it\n", dev_name, pid);
519 		munmap(dev_map, sizeof(int));
520 		close(dev_fd);
521 		return -1;
522 	}
523 
524 	*(int *)dev_map = (int)getpid();
525 	munmap(dev_map, sizeof(int));
526 	/* Keep dev_fd open to maintain the lock. */
527 	return dev_fd;
528 }
529 #endif /* __linux__ */
530 
531 #ifdef __FreeBSD__
532 int
533 spdk_pci_device_claim(const struct spdk_pci_addr *pci_addr)
534 {
535 	/* TODO */
536 	return 0;
537 }
538 #endif /* __FreeBSD__ */
539 
540 int
541 spdk_pci_addr_parse(struct spdk_pci_addr *addr, const char *bdf)
542 {
543 	unsigned domain, bus, dev, func;
544 
545 	if (addr == NULL || bdf == NULL) {
546 		return -EINVAL;
547 	}
548 
549 	if ((sscanf(bdf, "%x:%x:%x.%x", &domain, &bus, &dev, &func) == 4) ||
550 	    (sscanf(bdf, "%x.%x.%x.%x", &domain, &bus, &dev, &func) == 4)) {
551 		/* Matched a full address - all variables are initialized */
552 	} else if (sscanf(bdf, "%x:%x:%x", &domain, &bus, &dev) == 3) {
553 		func = 0;
554 	} else if ((sscanf(bdf, "%x:%x.%x", &bus, &dev, &func) == 3) ||
555 		   (sscanf(bdf, "%x.%x.%x", &bus, &dev, &func) == 3)) {
556 		domain = 0;
557 	} else if ((sscanf(bdf, "%x:%x", &bus, &dev) == 2) ||
558 		   (sscanf(bdf, "%x.%x", &bus, &dev) == 2)) {
559 		domain = 0;
560 		func = 0;
561 	} else {
562 		return -EINVAL;
563 	}
564 
565 	if (bus > 0xFF || dev > 0x1F || func > 7) {
566 		return -EINVAL;
567 	}
568 
569 	addr->domain = domain;
570 	addr->bus = bus;
571 	addr->dev = dev;
572 	addr->func = func;
573 
574 	return 0;
575 }
576 
577 int
578 spdk_pci_addr_fmt(char *bdf, size_t sz, const struct spdk_pci_addr *addr)
579 {
580 	int rc;
581 
582 	rc = snprintf(bdf, sz, "%04x:%02x:%02x.%x",
583 		      addr->domain, addr->bus,
584 		      addr->dev, addr->func);
585 
586 	if (rc > 0 && (size_t)rc < sz) {
587 		return 0;
588 	}
589 
590 	return -1;
591 }
592