xref: /dpdk/drivers/bus/vdev/vdev.c (revision 1fbb3977cb4cc95a88a383825b188398659883ea)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2016 RehiveTech. All rights reserved.
3  */
4 
5 #include <string.h>
6 #include <inttypes.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <stdint.h>
10 #include <stdbool.h>
11 #include <sys/queue.h>
12 
13 #include <rte_eal.h>
14 #include <dev_driver.h>
15 #include <bus_driver.h>
16 #include <rte_common.h>
17 #include <rte_devargs.h>
18 #include <rte_memory.h>
19 #include <rte_tailq.h>
20 #include <rte_spinlock.h>
21 #include <rte_string_fns.h>
22 #include <rte_errno.h>
23 
24 #include "bus_vdev_driver.h"
25 #include "vdev_logs.h"
26 #include "vdev_private.h"
27 
28 #define VDEV_MP_KEY	"bus_vdev_mp"
29 
30 /* Forward declare to access virtual bus name */
31 static struct rte_bus rte_vdev_bus;
32 
33 
34 static TAILQ_HEAD(, rte_vdev_device) vdev_device_list =
35 	TAILQ_HEAD_INITIALIZER(vdev_device_list);
36 /* The lock needs to be recursive because a vdev can manage another vdev. */
37 static rte_spinlock_recursive_t vdev_device_list_lock =
38 	RTE_SPINLOCK_RECURSIVE_INITIALIZER;
39 
40 static TAILQ_HEAD(, rte_vdev_driver) vdev_driver_list =
41 	TAILQ_HEAD_INITIALIZER(vdev_driver_list);
42 
43 struct vdev_custom_scan {
44 	TAILQ_ENTRY(vdev_custom_scan) next;
45 	rte_vdev_scan_callback callback;
46 	void *user_arg;
47 };
48 TAILQ_HEAD(vdev_custom_scans, vdev_custom_scan);
49 static struct vdev_custom_scans vdev_custom_scans =
50 	TAILQ_HEAD_INITIALIZER(vdev_custom_scans);
51 static rte_spinlock_t vdev_custom_scan_lock = RTE_SPINLOCK_INITIALIZER;
52 
53 /* register a driver */
54 void
55 rte_vdev_register(struct rte_vdev_driver *driver)
56 {
57 	TAILQ_INSERT_TAIL(&vdev_driver_list, driver, next);
58 }
59 
60 /* unregister a driver */
61 void
62 rte_vdev_unregister(struct rte_vdev_driver *driver)
63 {
64 	TAILQ_REMOVE(&vdev_driver_list, driver, next);
65 }
66 
67 int
68 rte_vdev_add_custom_scan(rte_vdev_scan_callback callback, void *user_arg)
69 {
70 	struct vdev_custom_scan *custom_scan;
71 
72 	rte_spinlock_lock(&vdev_custom_scan_lock);
73 
74 	/* check if already registered */
75 	TAILQ_FOREACH(custom_scan, &vdev_custom_scans, next) {
76 		if (custom_scan->callback == callback &&
77 				custom_scan->user_arg == user_arg)
78 			break;
79 	}
80 
81 	if (custom_scan == NULL) {
82 		custom_scan = malloc(sizeof(struct vdev_custom_scan));
83 		if (custom_scan != NULL) {
84 			custom_scan->callback = callback;
85 			custom_scan->user_arg = user_arg;
86 			TAILQ_INSERT_TAIL(&vdev_custom_scans, custom_scan, next);
87 		}
88 	}
89 
90 	rte_spinlock_unlock(&vdev_custom_scan_lock);
91 
92 	return (custom_scan == NULL) ? -1 : 0;
93 }
94 
95 int
96 rte_vdev_remove_custom_scan(rte_vdev_scan_callback callback, void *user_arg)
97 {
98 	struct vdev_custom_scan *custom_scan, *tmp_scan;
99 
100 	rte_spinlock_lock(&vdev_custom_scan_lock);
101 	RTE_TAILQ_FOREACH_SAFE(custom_scan, &vdev_custom_scans, next,
102 				tmp_scan) {
103 		if (custom_scan->callback != callback ||
104 				(custom_scan->user_arg != (void *)-1 &&
105 				custom_scan->user_arg != user_arg))
106 			continue;
107 		TAILQ_REMOVE(&vdev_custom_scans, custom_scan, next);
108 		free(custom_scan);
109 	}
110 	rte_spinlock_unlock(&vdev_custom_scan_lock);
111 
112 	return 0;
113 }
114 
115 static int
116 vdev_parse(const char *name, void *addr)
117 {
118 	struct rte_vdev_driver **out = addr;
119 	struct rte_vdev_driver *driver = NULL;
120 
121 	TAILQ_FOREACH(driver, &vdev_driver_list, next) {
122 		if (strncmp(driver->driver.name, name,
123 			    strlen(driver->driver.name)) == 0)
124 			break;
125 		if (driver->driver.alias &&
126 		    strncmp(driver->driver.alias, name,
127 			    strlen(driver->driver.alias)) == 0)
128 			break;
129 	}
130 	if (driver != NULL &&
131 	    addr != NULL)
132 		*out = driver;
133 	return driver == NULL;
134 }
135 
136 static int
137 vdev_dma_map(struct rte_device *dev, void *addr, uint64_t iova, size_t len)
138 {
139 	struct rte_vdev_device *vdev = RTE_DEV_TO_VDEV(dev);
140 	const struct rte_vdev_driver *driver;
141 
142 	if (!vdev) {
143 		rte_errno = EINVAL;
144 		return -1;
145 	}
146 
147 	if (!vdev->device.driver) {
148 		VDEV_LOG(DEBUG, "no driver attach to device %s", dev->name);
149 		return 1;
150 	}
151 
152 	driver = container_of(vdev->device.driver, const struct rte_vdev_driver,
153 			driver);
154 
155 	if (driver->dma_map)
156 		return driver->dma_map(vdev, addr, iova, len);
157 
158 	return 0;
159 }
160 
161 static int
162 vdev_dma_unmap(struct rte_device *dev, void *addr, uint64_t iova, size_t len)
163 {
164 	struct rte_vdev_device *vdev = RTE_DEV_TO_VDEV(dev);
165 	const struct rte_vdev_driver *driver;
166 
167 	if (!vdev) {
168 		rte_errno = EINVAL;
169 		return -1;
170 	}
171 
172 	if (!vdev->device.driver) {
173 		VDEV_LOG(DEBUG, "no driver attach to device %s", dev->name);
174 		return 1;
175 	}
176 
177 	driver = container_of(vdev->device.driver, const struct rte_vdev_driver,
178 			driver);
179 
180 	if (driver->dma_unmap)
181 		return driver->dma_unmap(vdev, addr, iova, len);
182 
183 	return 0;
184 }
185 
186 static int
187 vdev_probe_all_drivers(struct rte_vdev_device *dev)
188 {
189 	const char *name;
190 	struct rte_vdev_driver *driver;
191 	enum rte_iova_mode iova_mode;
192 	int ret;
193 
194 	if (rte_dev_is_probed(&dev->device))
195 		return -EEXIST;
196 
197 	name = rte_vdev_device_name(dev);
198 	VDEV_LOG(DEBUG, "Search driver to probe device %s", name);
199 
200 	if (vdev_parse(name, &driver))
201 		return -1;
202 
203 	iova_mode = rte_eal_iova_mode();
204 	if ((driver->drv_flags & RTE_VDEV_DRV_NEED_IOVA_AS_VA) && (iova_mode == RTE_IOVA_PA)) {
205 		VDEV_LOG(ERR, "%s requires VA IOVA mode but current mode is PA, not initializing",
206 				name);
207 		return -1;
208 	}
209 
210 	ret = driver->probe(dev);
211 	if (ret == 0)
212 		dev->device.driver = &driver->driver;
213 	return ret;
214 }
215 
216 /* The caller shall be responsible for thread-safe */
217 static struct rte_vdev_device *
218 find_vdev(const char *name)
219 {
220 	struct rte_vdev_device *dev;
221 
222 	if (!name)
223 		return NULL;
224 
225 	TAILQ_FOREACH(dev, &vdev_device_list, next) {
226 		const char *devname = rte_vdev_device_name(dev);
227 
228 		if (!strcmp(devname, name))
229 			return dev;
230 	}
231 
232 	return NULL;
233 }
234 
235 static struct rte_devargs *
236 alloc_devargs(const char *name, const char *args)
237 {
238 	struct rte_devargs *devargs;
239 	int ret;
240 
241 	devargs = calloc(1, sizeof(*devargs));
242 	if (!devargs)
243 		return NULL;
244 
245 	devargs->bus = &rte_vdev_bus;
246 	if (args)
247 		devargs->data = strdup(args);
248 	else
249 		devargs->data = strdup("");
250 	if (devargs->data == NULL) {
251 		free(devargs);
252 		return NULL;
253 	}
254 	devargs->args = devargs->data;
255 
256 	ret = strlcpy(devargs->name, name, sizeof(devargs->name));
257 	if (ret < 0 || ret >= (int)sizeof(devargs->name)) {
258 		rte_devargs_reset(devargs);
259 		free(devargs);
260 		return NULL;
261 	}
262 
263 	return devargs;
264 }
265 
266 static struct rte_devargs *
267 vdev_devargs_lookup(const char *name)
268 {
269 	struct rte_devargs *devargs;
270 	char dev_name[32];
271 
272 	RTE_EAL_DEVARGS_FOREACH("vdev", devargs) {
273 		devargs->bus->parse(devargs->name, &dev_name);
274 		if (strcmp(dev_name, name) == 0) {
275 			VDEV_LOG(INFO, "devargs matched %s", dev_name);
276 			return devargs;
277 		}
278 	}
279 	return NULL;
280 }
281 
282 static int
283 insert_vdev(const char *name, const char *args,
284 		struct rte_vdev_device **p_dev,
285 		bool init)
286 {
287 	struct rte_vdev_device *dev;
288 	struct rte_devargs *devargs;
289 	int ret;
290 
291 	if (name == NULL)
292 		return -EINVAL;
293 
294 	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
295 		devargs = alloc_devargs(name, args);
296 	else
297 		devargs = vdev_devargs_lookup(name);
298 
299 	if (!devargs)
300 		return -ENOMEM;
301 
302 	dev = calloc(1, sizeof(*dev));
303 	if (!dev) {
304 		ret = -ENOMEM;
305 		goto fail;
306 	}
307 
308 	dev->device.bus = &rte_vdev_bus;
309 	dev->device.numa_node = SOCKET_ID_ANY;
310 	dev->device.name = devargs->name;
311 
312 	if (find_vdev(name)) {
313 		/*
314 		 * A vdev is expected to have only one port.
315 		 * So there is no reason to try probing again,
316 		 * even with new arguments.
317 		 */
318 		ret = -EEXIST;
319 		goto fail;
320 	}
321 
322 	if (init)
323 		rte_devargs_insert(&devargs);
324 	dev->device.devargs = devargs;
325 	TAILQ_INSERT_TAIL(&vdev_device_list, dev, next);
326 
327 	if (p_dev)
328 		*p_dev = dev;
329 
330 	return 0;
331 fail:
332 	rte_devargs_reset(devargs);
333 	free(devargs);
334 	free(dev);
335 	return ret;
336 }
337 
338 int
339 rte_vdev_init(const char *name, const char *args)
340 {
341 	struct rte_vdev_device *dev;
342 	int ret;
343 
344 	rte_spinlock_recursive_lock(&vdev_device_list_lock);
345 	ret = insert_vdev(name, args, &dev, true);
346 	if (ret == 0) {
347 		ret = vdev_probe_all_drivers(dev);
348 		if (ret) {
349 			if (ret > 0)
350 				VDEV_LOG(ERR, "no driver found for %s", name);
351 			/* If fails, remove it from vdev list */
352 			TAILQ_REMOVE(&vdev_device_list, dev, next);
353 			rte_devargs_remove(dev->device.devargs);
354 			free(dev);
355 		}
356 	}
357 	rte_spinlock_recursive_unlock(&vdev_device_list_lock);
358 	return ret;
359 }
360 
361 static int
362 vdev_remove_driver(struct rte_vdev_device *dev)
363 {
364 	const char *name = rte_vdev_device_name(dev);
365 	const struct rte_vdev_driver *driver;
366 
367 	if (!dev->device.driver) {
368 		VDEV_LOG(DEBUG, "no driver attach to device %s", name);
369 		return 1;
370 	}
371 
372 	driver = container_of(dev->device.driver, const struct rte_vdev_driver,
373 		driver);
374 	return driver->remove(dev);
375 }
376 
377 int
378 rte_vdev_uninit(const char *name)
379 {
380 	struct rte_vdev_device *dev;
381 	int ret;
382 
383 	if (name == NULL)
384 		return -EINVAL;
385 
386 	rte_spinlock_recursive_lock(&vdev_device_list_lock);
387 
388 	dev = find_vdev(name);
389 	if (!dev) {
390 		ret = -ENOENT;
391 		goto unlock;
392 	}
393 
394 	ret = vdev_remove_driver(dev);
395 	if (ret)
396 		goto unlock;
397 
398 	TAILQ_REMOVE(&vdev_device_list, dev, next);
399 	rte_devargs_remove(dev->device.devargs);
400 	free(dev);
401 
402 unlock:
403 	rte_spinlock_recursive_unlock(&vdev_device_list_lock);
404 	return ret;
405 }
406 
407 struct vdev_param {
408 #define VDEV_SCAN_REQ	1
409 #define VDEV_SCAN_ONE	2
410 #define VDEV_SCAN_REP	3
411 	int type;
412 	int num;
413 	char name[RTE_DEV_NAME_MAX_LEN];
414 };
415 
416 static int vdev_plug(struct rte_device *dev);
417 
418 /**
419  * This function works as the action for both primary and secondary process
420  * for static vdev discovery when a secondary process is booting.
421  *
422  * step 1, secondary process sends a sync request to ask for vdev in primary;
423  * step 2, primary process receives the request, and send vdevs one by one;
424  * step 3, primary process sends back reply, which indicates how many vdevs
425  * are sent.
426  */
427 static int
428 vdev_action(const struct rte_mp_msg *mp_msg, const void *peer)
429 {
430 	struct rte_vdev_device *dev;
431 	struct rte_mp_msg mp_resp;
432 	struct vdev_param *ou = (struct vdev_param *)&mp_resp.param;
433 	const struct vdev_param *in = (const struct vdev_param *)mp_msg->param;
434 	const char *devname;
435 	int num;
436 	int ret;
437 
438 	strlcpy(mp_resp.name, VDEV_MP_KEY, sizeof(mp_resp.name));
439 	mp_resp.len_param = sizeof(*ou);
440 	mp_resp.num_fds = 0;
441 
442 	switch (in->type) {
443 	case VDEV_SCAN_REQ:
444 		ou->type = VDEV_SCAN_ONE;
445 		ou->num = 1;
446 		num = 0;
447 
448 		rte_spinlock_recursive_lock(&vdev_device_list_lock);
449 		TAILQ_FOREACH(dev, &vdev_device_list, next) {
450 			devname = rte_vdev_device_name(dev);
451 			if (strlen(devname) == 0) {
452 				VDEV_LOG(INFO, "vdev with no name is not sent");
453 				continue;
454 			}
455 			VDEV_LOG(INFO, "send vdev, %s", devname);
456 			strlcpy(ou->name, devname, RTE_DEV_NAME_MAX_LEN);
457 			if (rte_mp_sendmsg(&mp_resp) < 0)
458 				VDEV_LOG(ERR, "send vdev, %s, failed, %s",
459 					 devname, strerror(rte_errno));
460 			num++;
461 		}
462 		rte_spinlock_recursive_unlock(&vdev_device_list_lock);
463 
464 		ou->type = VDEV_SCAN_REP;
465 		ou->num = num;
466 		if (rte_mp_reply(&mp_resp, peer) < 0)
467 			VDEV_LOG(ERR, "Failed to reply a scan request");
468 		break;
469 	case VDEV_SCAN_ONE:
470 		VDEV_LOG(INFO, "receive vdev, %s", in->name);
471 		ret = insert_vdev(in->name, NULL, NULL, false);
472 		if (ret == -EEXIST)
473 			VDEV_LOG(DEBUG, "device already exist, %s", in->name);
474 		else if (ret < 0)
475 			VDEV_LOG(ERR, "failed to add vdev, %s", in->name);
476 		break;
477 	default:
478 		VDEV_LOG(ERR, "vdev cannot recognize this message");
479 	}
480 
481 	return 0;
482 }
483 
484 static int
485 vdev_scan(void)
486 {
487 	struct rte_vdev_device *dev;
488 	struct rte_devargs *devargs;
489 	struct vdev_custom_scan *custom_scan;
490 
491 	if (rte_mp_action_register(VDEV_MP_KEY, vdev_action) < 0 &&
492 	    rte_errno != EEXIST) {
493 		/* for primary, unsupported IPC is not an error */
494 		if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
495 				rte_errno == ENOTSUP)
496 			goto scan;
497 		VDEV_LOG(ERR, "Failed to add vdev mp action");
498 		return -1;
499 	}
500 
501 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
502 		struct rte_mp_msg mp_req, *mp_rep;
503 		struct rte_mp_reply mp_reply;
504 		struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
505 		struct vdev_param *req = (struct vdev_param *)mp_req.param;
506 		struct vdev_param *resp;
507 
508 		strlcpy(mp_req.name, VDEV_MP_KEY, sizeof(mp_req.name));
509 		mp_req.len_param = sizeof(*req);
510 		mp_req.num_fds = 0;
511 		req->type = VDEV_SCAN_REQ;
512 		if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
513 		    mp_reply.nb_received == 1) {
514 			mp_rep = &mp_reply.msgs[0];
515 			resp = (struct vdev_param *)mp_rep->param;
516 			VDEV_LOG(INFO, "Received %d vdevs", resp->num);
517 			free(mp_reply.msgs);
518 		} else
519 			VDEV_LOG(ERR, "Failed to request vdev from primary");
520 
521 		/* Fall through to allow private vdevs in secondary process */
522 	}
523 
524 scan:
525 	/* call custom scan callbacks if any */
526 	rte_spinlock_lock(&vdev_custom_scan_lock);
527 	TAILQ_FOREACH(custom_scan, &vdev_custom_scans, next) {
528 		if (custom_scan->callback != NULL)
529 			/*
530 			 * the callback should update devargs list
531 			 * by calling rte_devargs_insert() with
532 			 *     devargs.bus = rte_bus_find_by_name("vdev");
533 			 *     devargs.type = RTE_DEVTYPE_VIRTUAL;
534 			 *     devargs.policy = RTE_DEV_ALLOWED;
535 			 */
536 			custom_scan->callback(custom_scan->user_arg);
537 	}
538 	rte_spinlock_unlock(&vdev_custom_scan_lock);
539 
540 	/* for virtual devices we scan the devargs_list populated via cmdline */
541 	RTE_EAL_DEVARGS_FOREACH("vdev", devargs) {
542 
543 		dev = calloc(1, sizeof(*dev));
544 		if (!dev)
545 			return -1;
546 
547 		rte_spinlock_recursive_lock(&vdev_device_list_lock);
548 
549 		if (find_vdev(devargs->name)) {
550 			rte_spinlock_recursive_unlock(&vdev_device_list_lock);
551 			free(dev);
552 			continue;
553 		}
554 
555 		dev->device.bus = &rte_vdev_bus;
556 		dev->device.devargs = devargs;
557 		dev->device.numa_node = SOCKET_ID_ANY;
558 		dev->device.name = devargs->name;
559 
560 		TAILQ_INSERT_TAIL(&vdev_device_list, dev, next);
561 
562 		rte_spinlock_recursive_unlock(&vdev_device_list_lock);
563 	}
564 
565 	return 0;
566 }
567 
568 static int
569 vdev_probe(void)
570 {
571 	struct rte_vdev_device *dev;
572 	int r, ret = 0;
573 
574 	/* call the init function for each virtual device */
575 	TAILQ_FOREACH(dev, &vdev_device_list, next) {
576 		/* we don't use the vdev lock here, as it's only used in DPDK
577 		 * initialization; and we don't want to hold such a lock when
578 		 * we call each driver probe.
579 		 */
580 
581 		r = vdev_probe_all_drivers(dev);
582 		if (r != 0) {
583 			if (r == -EEXIST)
584 				continue;
585 			VDEV_LOG(ERR, "failed to initialize %s device",
586 				rte_vdev_device_name(dev));
587 			ret = -1;
588 		}
589 	}
590 
591 	return ret;
592 }
593 
594 static int
595 vdev_cleanup(void)
596 {
597 	struct rte_vdev_device *dev, *tmp_dev;
598 	int error = 0;
599 
600 	RTE_TAILQ_FOREACH_SAFE(dev, &vdev_device_list, next, tmp_dev) {
601 		const struct rte_vdev_driver *drv;
602 		int ret = 0;
603 
604 		if (dev->device.driver == NULL)
605 			goto free;
606 
607 		drv = container_of(dev->device.driver, const struct rte_vdev_driver, driver);
608 
609 		if (drv->remove == NULL)
610 			goto free;
611 
612 		ret = drv->remove(dev);
613 		if (ret < 0)
614 			error = -1;
615 
616 		dev->device.driver = NULL;
617 free:
618 		free(dev);
619 	}
620 
621 	return error;
622 }
623 
624 struct rte_device *
625 rte_vdev_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
626 		     const void *data)
627 {
628 	const struct rte_vdev_device *vstart;
629 	struct rte_vdev_device *dev;
630 
631 	rte_spinlock_recursive_lock(&vdev_device_list_lock);
632 	if (start != NULL) {
633 		vstart = RTE_DEV_TO_VDEV_CONST(start);
634 		dev = TAILQ_NEXT(vstart, next);
635 	} else {
636 		dev = TAILQ_FIRST(&vdev_device_list);
637 	}
638 	while (dev != NULL) {
639 		if (cmp(&dev->device, data) == 0)
640 			break;
641 		dev = TAILQ_NEXT(dev, next);
642 	}
643 	rte_spinlock_recursive_unlock(&vdev_device_list_lock);
644 
645 	return dev ? &dev->device : NULL;
646 }
647 
648 static int
649 vdev_plug(struct rte_device *dev)
650 {
651 	return vdev_probe_all_drivers(RTE_DEV_TO_VDEV(dev));
652 }
653 
654 static int
655 vdev_unplug(struct rte_device *dev)
656 {
657 	return rte_vdev_uninit(dev->name);
658 }
659 
660 static enum rte_iova_mode
661 vdev_get_iommu_class(void)
662 {
663 	const char *name;
664 	struct rte_vdev_device *dev;
665 	struct rte_vdev_driver *driver;
666 
667 	TAILQ_FOREACH(dev, &vdev_device_list, next) {
668 		name = rte_vdev_device_name(dev);
669 		if (vdev_parse(name, &driver))
670 			continue;
671 
672 		if (driver->drv_flags & RTE_VDEV_DRV_NEED_IOVA_AS_VA)
673 			return RTE_IOVA_VA;
674 	}
675 
676 	return RTE_IOVA_DC;
677 }
678 
679 static struct rte_bus rte_vdev_bus = {
680 	.scan = vdev_scan,
681 	.probe = vdev_probe,
682 	.cleanup = vdev_cleanup,
683 	.find_device = rte_vdev_find_device,
684 	.plug = vdev_plug,
685 	.unplug = vdev_unplug,
686 	.parse = vdev_parse,
687 	.dma_map = vdev_dma_map,
688 	.dma_unmap = vdev_dma_unmap,
689 	.get_iommu_class = vdev_get_iommu_class,
690 	.dev_iterate = rte_vdev_dev_iterate,
691 };
692 
693 RTE_REGISTER_BUS(vdev, rte_vdev_bus);
694 RTE_LOG_REGISTER_DEFAULT(vdev_logtype_bus, NOTICE);
695