xref: /spdk/lib/env_dpdk/pci.c (revision 45f7571a08ed25a4d3f9453f4a4ee245835feeb8)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "env_internal.h"
35 
36 #include "spdk/env.h"
37 
38 #define SYSFS_PCI_DRIVERS	"/sys/bus/pci/drivers"
39 
40 #define PCI_CFG_SIZE		256
41 #define PCI_EXT_CAP_ID_SN	0x03
42 
43 int
44 spdk_pci_device_init(struct rte_pci_driver *driver,
45 		     struct rte_pci_device *device)
46 {
47 	struct spdk_pci_enum_ctx *ctx = (struct spdk_pci_enum_ctx *)driver;
48 
49 	if (!ctx->cb_fn) {
50 #if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
51 		rte_pci_unmap_device(device);
52 #elif RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0)
53 		rte_eal_pci_unmap_device(device);
54 #endif
55 
56 		/* Return a positive value to indicate that this device does not belong to this driver, but
57 		 * this isn't an error. */
58 		return 1;
59 	}
60 
61 	if (device->kdrv == RTE_KDRV_VFIO) {
62 		/*
63 		 * TODO: This is a workaround for an issue where the device is not ready after VFIO reset.
64 		 * Figure out what is actually going wrong and remove this sleep.
65 		 */
66 		usleep(500 * 1000);
67 	}
68 
69 	return ctx->cb_fn(ctx->cb_arg, (struct spdk_pci_device *)device);
70 }
71 
72 int
73 spdk_pci_device_fini(struct rte_pci_device *device)
74 {
75 	return 0;
76 }
77 
78 void
79 spdk_pci_device_detach(struct spdk_pci_device *device)
80 {
81 	struct rte_pci_addr	addr;
82 
83 	addr.domain = device->addr.domain;
84 	addr.bus = device->addr.bus;
85 	addr.devid = device->addr.devid;
86 	addr.function = device->addr.function;
87 
88 #if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0)
89 #if RTE_VERSION < RTE_VERSION_NUM(17, 05, 0, 0)
90 	rte_eal_device_remove(&device->device);
91 #endif
92 #endif
93 
94 #if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
95 	rte_pci_detach(&addr);
96 #else
97 	rte_eal_pci_detach(&addr);
98 #endif
99 }
100 
101 int
102 spdk_pci_device_attach(struct spdk_pci_enum_ctx *ctx,
103 		       spdk_pci_enum_cb enum_cb,
104 		       void *enum_ctx, struct spdk_pci_addr *pci_address)
105 {
106 	struct rte_pci_addr		addr;
107 
108 	addr.domain = pci_address->domain;
109 	addr.bus = pci_address->bus;
110 	addr.devid = pci_address->dev;
111 	addr.function = pci_address->func;
112 
113 	pthread_mutex_lock(&ctx->mtx);
114 
115 	if (!ctx->is_registered) {
116 		ctx->is_registered = true;
117 #if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
118 		rte_pci_register(&ctx->driver);
119 #else
120 		rte_eal_pci_register(&ctx->driver);
121 #endif
122 	}
123 
124 	ctx->cb_fn = enum_cb;
125 	ctx->cb_arg = enum_ctx;
126 
127 #if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
128 	if (rte_pci_probe_one(&addr) != 0) {
129 #else
130 	if (rte_eal_pci_probe_one(&addr) != 0) {
131 #endif
132 		ctx->cb_arg = NULL;
133 		ctx->cb_fn = NULL;
134 		pthread_mutex_unlock(&ctx->mtx);
135 		return -1;
136 	}
137 
138 	ctx->cb_arg = NULL;
139 	ctx->cb_fn = NULL;
140 	pthread_mutex_unlock(&ctx->mtx);
141 
142 	return 0;
143 }
144 
145 /* Note: You can call spdk_pci_enumerate from more than one thread
146  *       simultaneously safely, but you cannot call spdk_pci_enumerate
147  *       and rte_eal_pci_probe simultaneously.
148  */
149 int
150 spdk_pci_enumerate(struct spdk_pci_enum_ctx *ctx,
151 		   spdk_pci_enum_cb enum_cb,
152 		   void *enum_ctx)
153 {
154 	pthread_mutex_lock(&ctx->mtx);
155 
156 	if (!ctx->is_registered) {
157 		ctx->is_registered = true;
158 #if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
159 		rte_pci_register(&ctx->driver);
160 #else
161 		rte_eal_pci_register(&ctx->driver);
162 #endif
163 	}
164 
165 	ctx->cb_fn = enum_cb;
166 	ctx->cb_arg = enum_ctx;
167 
168 
169 #if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
170 	if (rte_pci_probe() != 0) {
171 #else
172 	if (rte_eal_pci_probe() != 0) {
173 #endif
174 		ctx->cb_arg = NULL;
175 		ctx->cb_fn = NULL;
176 		pthread_mutex_unlock(&ctx->mtx);
177 		return -1;
178 	}
179 
180 	ctx->cb_arg = NULL;
181 	ctx->cb_fn = NULL;
182 	pthread_mutex_unlock(&ctx->mtx);
183 
184 	return 0;
185 }
186 
187 struct spdk_pci_device *
188 spdk_pci_get_device(struct spdk_pci_addr *pci_addr)
189 {
190 	struct rte_pci_device	*dev;
191 	struct rte_pci_addr	addr;
192 	int			rc;
193 
194 	addr.domain = pci_addr->domain;
195 	addr.bus = pci_addr->bus;
196 	addr.devid = pci_addr->dev;
197 	addr.function = pci_addr->func;
198 
199 #if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 2)
200 	FOREACH_DEVICE_ON_PCIBUS(dev) {
201 #else
202 	TAILQ_FOREACH(dev, &pci_device_list, next) {
203 #endif
204 		rc = rte_eal_compare_pci_addr(&dev->addr, &addr);
205 		if (rc < 0) {
206 			continue;
207 		}
208 
209 		if (rc == 0) {
210 			return (struct spdk_pci_device *)dev;
211 		} else {
212 			break;
213 		}
214 	}
215 
216 	return NULL;
217 }
218 
219 int
220 spdk_pci_device_map_bar(struct spdk_pci_device *device, uint32_t bar,
221 			void **mapped_addr, uint64_t *phys_addr, uint64_t *size)
222 {
223 	struct rte_pci_device *dev = device;
224 
225 	*mapped_addr = dev->mem_resource[bar].addr;
226 	*phys_addr = (uint64_t)dev->mem_resource[bar].phys_addr;
227 	*size = (uint64_t)dev->mem_resource[bar].len;
228 
229 	return 0;
230 }
231 
232 int
233 spdk_pci_device_unmap_bar(struct spdk_pci_device *device, uint32_t bar, void *addr)
234 {
235 	return 0;
236 }
237 
238 uint32_t
239 spdk_pci_device_get_domain(struct spdk_pci_device *dev)
240 {
241 	return dev->addr.domain;
242 }
243 
244 uint8_t
245 spdk_pci_device_get_bus(struct spdk_pci_device *dev)
246 {
247 	return dev->addr.bus;
248 }
249 
250 uint8_t
251 spdk_pci_device_get_dev(struct spdk_pci_device *dev)
252 {
253 	return dev->addr.devid;
254 }
255 
256 uint8_t
257 spdk_pci_device_get_func(struct spdk_pci_device *dev)
258 {
259 	return dev->addr.function;
260 }
261 
262 uint16_t
263 spdk_pci_device_get_vendor_id(struct spdk_pci_device *dev)
264 {
265 	return dev->id.vendor_id;
266 }
267 
268 uint16_t
269 spdk_pci_device_get_device_id(struct spdk_pci_device *dev)
270 {
271 	return dev->id.device_id;
272 }
273 
274 uint16_t
275 spdk_pci_device_get_subvendor_id(struct spdk_pci_device *dev)
276 {
277 	return dev->id.subsystem_vendor_id;
278 }
279 
280 uint16_t
281 spdk_pci_device_get_subdevice_id(struct spdk_pci_device *dev)
282 {
283 	return dev->id.subsystem_device_id;
284 }
285 
286 struct spdk_pci_id
287 spdk_pci_device_get_id(struct spdk_pci_device *pci_dev)
288 {
289 	struct spdk_pci_id pci_id;
290 
291 	pci_id.vendor_id = spdk_pci_device_get_vendor_id(pci_dev);
292 	pci_id.device_id = spdk_pci_device_get_device_id(pci_dev);
293 	pci_id.subvendor_id = spdk_pci_device_get_subvendor_id(pci_dev);
294 	pci_id.subdevice_id = spdk_pci_device_get_subdevice_id(pci_dev);
295 
296 	return pci_id;
297 }
298 
299 int
300 spdk_pci_device_get_socket_id(struct spdk_pci_device *pci_dev)
301 {
302 #if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0)
303 	return pci_dev->device.numa_node;
304 #else
305 	return pci_dev->numa_node;
306 #endif
307 }
308 
309 int
310 spdk_pci_device_cfg_read8(struct spdk_pci_device *dev, uint8_t *value, uint32_t offset)
311 {
312 #if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
313 	return rte_pci_read_config(dev, value, 1, offset) == 1 ? 0 : -1;
314 #else
315 	return rte_eal_pci_read_config(dev, value, 1, offset) == 1 ? 0 : -1;
316 #endif
317 }
318 
319 int
320 spdk_pci_device_cfg_write8(struct spdk_pci_device *dev, uint8_t value, uint32_t offset)
321 {
322 #if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
323 	return rte_pci_write_config(dev, &value, 1, offset) == 1 ? 0 : -1;
324 #else
325 	return rte_eal_pci_write_config(dev, &value, 1, offset) == 1 ? 0 : -1;
326 #endif
327 }
328 
329 int
330 spdk_pci_device_cfg_read16(struct spdk_pci_device *dev, uint16_t *value, uint32_t offset)
331 {
332 #if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
333 	return rte_pci_read_config(dev, value, 2, offset) == 2 ? 0 : -1;
334 #else
335 	return rte_eal_pci_read_config(dev, value, 2, offset) == 2 ? 0 : -1;
336 #endif
337 }
338 
339 int
340 spdk_pci_device_cfg_write16(struct spdk_pci_device *dev, uint16_t value, uint32_t offset)
341 {
342 #if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
343 	return rte_pci_write_config(dev, &value, 2, offset) == 2 ? 0 : -1;
344 #else
345 	return rte_eal_pci_write_config(dev, &value, 2, offset) == 2 ? 0 : -1;
346 #endif
347 }
348 
349 int
350 spdk_pci_device_cfg_read32(struct spdk_pci_device *dev, uint32_t *value, uint32_t offset)
351 {
352 #if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
353 	return rte_pci_read_config(dev, value, 4, offset) == 4 ? 0 : -1;
354 #else
355 	return rte_eal_pci_read_config(dev, value, 4, offset) == 4 ? 0 : -1;
356 #endif
357 }
358 
359 int
360 spdk_pci_device_cfg_write32(struct spdk_pci_device *dev, uint32_t value, uint32_t offset)
361 {
362 #if RTE_VERSION >= RTE_VERSION_NUM(17, 05, 0, 4)
363 	return rte_pci_write_config(dev, &value, 4, offset) == 4 ? 0 : -1;
364 #else
365 	return rte_eal_pci_write_config(dev, &value, 4, offset) == 4 ? 0 : -1;
366 #endif
367 }
368 
369 int
370 spdk_pci_device_get_serial_number(struct spdk_pci_device *dev, char *sn, size_t len)
371 {
372 	int err;
373 	uint32_t pos, header = 0;
374 	uint32_t i, buf[2];
375 
376 	if (len < 17)
377 		return -1;
378 
379 	err = spdk_pci_device_cfg_read32(dev, &header, PCI_CFG_SIZE);
380 	if (err || !header)
381 		return -1;
382 
383 	pos = PCI_CFG_SIZE;
384 	while (1) {
385 		if ((header & 0x0000ffff) == PCI_EXT_CAP_ID_SN) {
386 			if (pos) {
387 				/* skip the header */
388 				pos += 4;
389 				for (i = 0; i < 2; i++) {
390 					err = spdk_pci_device_cfg_read32(dev, &buf[i], pos + 4 * i);
391 					if (err)
392 						return -1;
393 				}
394 				snprintf(sn, len, "%08x%08x", buf[1], buf[0]);
395 				return 0;
396 			}
397 		}
398 		pos = (header >> 20) & 0xffc;
399 		/* 0 if no other items exist */
400 		if (pos < PCI_CFG_SIZE)
401 			return -1;
402 		err = spdk_pci_device_cfg_read32(dev, &header, pos);
403 		if (err)
404 			return -1;
405 	}
406 	return -1;
407 }
408 
409 struct spdk_pci_addr
410 spdk_pci_device_get_addr(struct spdk_pci_device *pci_dev)
411 {
412 	struct spdk_pci_addr pci_addr;
413 
414 	pci_addr.domain = spdk_pci_device_get_domain(pci_dev);
415 	pci_addr.bus = spdk_pci_device_get_bus(pci_dev);
416 	pci_addr.dev = spdk_pci_device_get_dev(pci_dev);
417 	pci_addr.func = spdk_pci_device_get_func(pci_dev);
418 
419 	return pci_addr;
420 }
421 
422 int
423 spdk_pci_addr_compare(const struct spdk_pci_addr *a1, const struct spdk_pci_addr *a2)
424 {
425 	if (a1->domain > a2->domain) {
426 		return 1;
427 	} else if (a1->domain < a2->domain) {
428 		return -1;
429 	} else if (a1->bus > a2->bus) {
430 		return 1;
431 	} else if (a1->bus < a2->bus) {
432 		return -1;
433 	} else if (a1->dev > a2->dev) {
434 		return 1;
435 	} else if (a1->dev < a2->dev) {
436 		return -1;
437 	} else if (a1->func > a2->func) {
438 		return 1;
439 	} else if (a1->func < a2->func) {
440 		return -1;
441 	}
442 
443 	return 0;
444 }
445 
446 #ifdef __linux__
447 int
448 spdk_pci_device_claim(const struct spdk_pci_addr *pci_addr)
449 {
450 	int dev_fd;
451 	char shm_name[64];
452 	int pid;
453 	void *dev_map;
454 	struct flock pcidev_lock = {
455 		.l_type = F_WRLCK,
456 		.l_whence = SEEK_SET,
457 		.l_start = 0,
458 		.l_len = 0,
459 	};
460 
461 	snprintf(shm_name, sizeof(shm_name), "%04x:%02x:%02x.%x", pci_addr->domain, pci_addr->bus,
462 		 pci_addr->dev, pci_addr->func);
463 
464 	dev_fd = shm_open(shm_name, O_RDWR | O_CREAT, 0600);
465 	if (dev_fd == -1) {
466 		fprintf(stderr, "could not shm_open %s\n", shm_name);
467 		return -1;
468 	}
469 
470 	if (ftruncate(dev_fd, sizeof(int)) != 0) {
471 		fprintf(stderr, "could not truncate shm %s\n", shm_name);
472 		close(dev_fd);
473 		return -1;
474 	}
475 
476 	dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
477 		       MAP_SHARED, dev_fd, 0);
478 	if (dev_map == NULL) {
479 		fprintf(stderr, "could not mmap shm %s\n", shm_name);
480 		close(dev_fd);
481 		return -1;
482 	}
483 
484 	if (fcntl(dev_fd, F_SETLK, &pcidev_lock) != 0) {
485 		pid = *(int *)dev_map;
486 		fprintf(stderr, "Cannot create lock on device %s, probably"
487 			" process %d has claimed it\n", shm_name, pid);
488 		munmap(dev_map, sizeof(int));
489 		close(dev_fd);
490 		return -1;
491 	}
492 
493 	*(int *)dev_map = (int)getpid();
494 	munmap(dev_map, sizeof(int));
495 	/* Keep dev_fd open to maintain the lock. */
496 	return 0;
497 }
498 #endif /* __linux__ */
499 
500 #ifdef __FreeBSD__
501 int
502 spdk_pci_device_claim(const struct spdk_pci_addr *pci_addr)
503 {
504 	/* TODO */
505 	return 0;
506 }
507 #endif /* __FreeBSD__ */
508 
509 int
510 spdk_pci_addr_parse(struct spdk_pci_addr *addr, const char *bdf)
511 {
512 	unsigned domain, bus, dev, func;
513 
514 	if (addr == NULL || bdf == NULL) {
515 		return -EINVAL;
516 	}
517 
518 	if ((sscanf(bdf, "%x:%x:%x.%x", &domain, &bus, &dev, &func) == 4) ||
519 	    (sscanf(bdf, "%x.%x.%x.%x", &domain, &bus, &dev, &func) == 4)) {
520 		/* Matched a full address - all variables are initialized */
521 	} else if (sscanf(bdf, "%x:%x:%x", &domain, &bus, &dev) == 3) {
522 		func = 0;
523 	} else if ((sscanf(bdf, "%x:%x.%x", &bus, &dev, &func) == 3) ||
524 		   (sscanf(bdf, "%x.%x.%x", &bus, &dev, &func) == 3)) {
525 		domain = 0;
526 	} else if ((sscanf(bdf, "%x:%x", &bus, &dev) == 2) ||
527 		   (sscanf(bdf, "%x.%x", &bus, &dev) == 2)) {
528 		domain = 0;
529 		func = 0;
530 	} else {
531 		return -EINVAL;
532 	}
533 
534 	if (bus > 0xFF || dev > 0x1F || func > 7) {
535 		return -EINVAL;
536 	}
537 
538 	addr->domain = domain;
539 	addr->bus = bus;
540 	addr->dev = dev;
541 	addr->func = func;
542 
543 	return 0;
544 }
545 
546 int
547 spdk_pci_addr_fmt(char *bdf, size_t sz, const struct spdk_pci_addr *addr)
548 {
549 	int rc;
550 
551 	rc = snprintf(bdf, sz, "%04x:%02x:%02x.%x",
552 		      addr->domain, addr->bus,
553 		      addr->dev, addr->func);
554 
555 	if (rc > 0 && (size_t)rc < sz) {
556 		return 0;
557 	}
558 
559 	return -1;
560 }
561