xref: /dpdk/lib/eal/linux/eal_dev.c (revision 8f1d23ece06adff5eae9f1b4365bdbbd3abee2b2)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2018 Intel Corporation
3  */
4 
5 #include <stdlib.h>
6 #include <string.h>
7 #include <unistd.h>
8 #include <signal.h>
9 #include <sys/socket.h>
10 #include <linux/netlink.h>
11 
12 #include <rte_string_fns.h>
13 #include <rte_log.h>
14 #include <rte_dev.h>
15 #include <rte_interrupts.h>
16 #include <rte_alarm.h>
17 #include <rte_bus.h>
18 #include <rte_spinlock.h>
19 #include <rte_errno.h>
20 
21 #include "eal_private.h"
22 
23 static struct rte_intr_handle *intr_handle;
24 static rte_rwlock_t monitor_lock = RTE_RWLOCK_INITIALIZER;
25 static uint32_t monitor_refcount;
26 static bool hotplug_handle;
27 
28 #define EAL_UEV_MSG_LEN 4096
29 #define EAL_UEV_MSG_ELEM_LEN 128
30 
31 /*
32  * spinlock for device hot-unplug failure handling. If it try to access bus or
33  * device, such as handle sigbus on bus or handle memory failure for device
34  * just need to use this lock. It could protect the bus and the device to avoid
35  * race condition.
36  */
37 static rte_spinlock_t failure_handle_lock = RTE_SPINLOCK_INITIALIZER;
38 
39 static struct sigaction sigbus_action_old;
40 
41 static int sigbus_need_recover;
42 
43 static void dev_uev_handler(__rte_unused void *param);
44 
45 /* identify the system layer which reports this event. */
46 enum eal_dev_event_subsystem {
47 	EAL_DEV_EVENT_SUBSYSTEM_PCI, /* PCI bus device event */
48 	EAL_DEV_EVENT_SUBSYSTEM_UIO, /* UIO driver device event */
49 	EAL_DEV_EVENT_SUBSYSTEM_VFIO, /* VFIO driver device event */
50 	EAL_DEV_EVENT_SUBSYSTEM_MAX
51 };
52 
53 static void
54 sigbus_action_recover(void)
55 {
56 	if (sigbus_need_recover) {
57 		sigaction(SIGBUS, &sigbus_action_old, NULL);
58 		sigbus_need_recover = 0;
59 	}
60 }
61 
62 static void sigbus_handler(int signum, siginfo_t *info,
63 				void *ctx __rte_unused)
64 {
65 	int ret;
66 
67 	RTE_LOG(DEBUG, EAL, "Thread catch SIGBUS, fault address:%p\n",
68 		info->si_addr);
69 
70 	rte_spinlock_lock(&failure_handle_lock);
71 	ret = rte_bus_sigbus_handler(info->si_addr);
72 	rte_spinlock_unlock(&failure_handle_lock);
73 	if (ret == -1) {
74 		rte_exit(EXIT_FAILURE,
75 			 "Failed to handle SIGBUS for hot-unplug, "
76 			 "(rte_errno: %s)!", strerror(rte_errno));
77 	} else if (ret == 1) {
78 		if (sigbus_action_old.sa_flags == SA_SIGINFO
79 		    && sigbus_action_old.sa_sigaction) {
80 			(*(sigbus_action_old.sa_sigaction))(signum,
81 							    info, ctx);
82 		} else if (sigbus_action_old.sa_flags != SA_SIGINFO
83 			   && sigbus_action_old.sa_handler) {
84 			(*(sigbus_action_old.sa_handler))(signum);
85 		} else {
86 			rte_exit(EXIT_FAILURE,
87 				 "Failed to handle generic SIGBUS!");
88 		}
89 	}
90 
91 	RTE_LOG(DEBUG, EAL, "Success to handle SIGBUS for hot-unplug!\n");
92 }
93 
94 static int cmp_dev_name(const struct rte_device *dev,
95 	const void *_name)
96 {
97 	const char *name = _name;
98 
99 	return strcmp(dev->name, name);
100 }
101 
102 static int
103 dev_uev_socket_fd_create(void)
104 {
105 	struct sockaddr_nl addr;
106 	int ret, fd;
107 
108 	fd = socket(PF_NETLINK, SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK,
109 		    NETLINK_KOBJECT_UEVENT);
110 	if (fd < 0) {
111 		RTE_LOG(ERR, EAL, "create uevent fd failed.\n");
112 		return -1;
113 	}
114 
115 	memset(&addr, 0, sizeof(addr));
116 	addr.nl_family = AF_NETLINK;
117 	addr.nl_pid = 0;
118 	addr.nl_groups = 0xffffffff;
119 
120 	ret = bind(fd, (struct sockaddr *) &addr, sizeof(addr));
121 	if (ret < 0) {
122 		RTE_LOG(ERR, EAL, "Failed to bind uevent socket.\n");
123 		goto err;
124 	}
125 
126 	if (rte_intr_fd_set(intr_handle, fd))
127 		goto err;
128 
129 	return 0;
130 err:
131 	close(fd);
132 	fd = -1;
133 	return ret;
134 }
135 
136 struct rte_dev_event {
137 	enum rte_dev_event_type type;	/**< device event type */
138 	int subsystem;			/**< subsystem id */
139 	char *devname;			/**< device name */
140 };
141 
142 static int
143 dev_uev_parse(const char *buf, struct rte_dev_event *event, int length)
144 {
145 	char action[EAL_UEV_MSG_ELEM_LEN];
146 	char subsystem[EAL_UEV_MSG_ELEM_LEN];
147 	char pci_slot_name[EAL_UEV_MSG_ELEM_LEN];
148 	int i = 0;
149 
150 	memset(action, 0, EAL_UEV_MSG_ELEM_LEN);
151 	memset(subsystem, 0, EAL_UEV_MSG_ELEM_LEN);
152 	memset(pci_slot_name, 0, EAL_UEV_MSG_ELEM_LEN);
153 
154 	while (i < length) {
155 		for (; i < length; i++) {
156 			if (*buf)
157 				break;
158 			buf++;
159 		}
160 		if (i >= length)
161 			break;
162 
163 		/**
164 		 * check device uevent from kernel side, no need to check
165 		 * uevent from udev.
166 		 */
167 		if (!strncmp(buf, "libudev", 7)) {
168 			buf += 7;
169 			i += 7;
170 			return -1;
171 		}
172 		if (!strncmp(buf, "ACTION=", 7)) {
173 			buf += 7;
174 			i += 7;
175 			strlcpy(action, buf, sizeof(action));
176 		} else if (!strncmp(buf, "SUBSYSTEM=", 10)) {
177 			buf += 10;
178 			i += 10;
179 			strlcpy(subsystem, buf, sizeof(subsystem));
180 		} else if (!strncmp(buf, "PCI_SLOT_NAME=", 14)) {
181 			buf += 14;
182 			i += 14;
183 			strlcpy(pci_slot_name, buf, sizeof(subsystem));
184 			event->devname = strdup(pci_slot_name);
185 		}
186 		for (; i < length; i++) {
187 			if (*buf == '\0')
188 				break;
189 			buf++;
190 		}
191 	}
192 
193 	/* parse the subsystem layer */
194 	if (!strncmp(subsystem, "uio", 3))
195 		event->subsystem = EAL_DEV_EVENT_SUBSYSTEM_UIO;
196 	else if (!strncmp(subsystem, "pci", 3))
197 		event->subsystem = EAL_DEV_EVENT_SUBSYSTEM_PCI;
198 	else if (!strncmp(subsystem, "vfio", 4))
199 		event->subsystem = EAL_DEV_EVENT_SUBSYSTEM_VFIO;
200 	else
201 		goto err;
202 
203 	/* parse the action type */
204 	if (!strncmp(action, "add", 3))
205 		event->type = RTE_DEV_EVENT_ADD;
206 	else if (!strncmp(action, "remove", 6))
207 		event->type = RTE_DEV_EVENT_REMOVE;
208 	else
209 		goto err;
210 	return 0;
211 err:
212 	free(event->devname);
213 	return -1;
214 }
215 
216 static void
217 dev_delayed_unregister(void *param)
218 {
219 	rte_intr_callback_unregister(intr_handle, dev_uev_handler, param);
220 	if (rte_intr_fd_get(intr_handle) >= 0) {
221 		close(rte_intr_fd_get(intr_handle));
222 		rte_intr_fd_set(intr_handle, -1);
223 	}
224 }
225 
226 static void
227 dev_uev_handler(__rte_unused void *param)
228 {
229 	struct rte_dev_event uevent;
230 	int ret;
231 	char buf[EAL_UEV_MSG_LEN + 1];
232 	struct rte_bus *bus;
233 	struct rte_device *dev;
234 	const char *busname = "";
235 
236 	memset(&uevent, 0, sizeof(struct rte_dev_event));
237 	memset(buf, 0, EAL_UEV_MSG_LEN + 1);
238 
239 	if (rte_intr_fd_get(intr_handle) < 0)
240 		return;
241 
242 	ret = recv(rte_intr_fd_get(intr_handle), buf, EAL_UEV_MSG_LEN,
243 		   MSG_DONTWAIT);
244 	if (ret < 0 && errno == EAGAIN)
245 		return;
246 	else if (ret <= 0) {
247 		/* connection is closed or broken, can not up again. */
248 		RTE_LOG(ERR, EAL, "uevent socket connection is broken.\n");
249 		rte_eal_alarm_set(1, dev_delayed_unregister, NULL);
250 		return;
251 	}
252 
253 	ret = dev_uev_parse(buf, &uevent, EAL_UEV_MSG_LEN);
254 	if (ret < 0) {
255 		RTE_LOG(DEBUG, EAL, "Ignoring uevent '%s'\n", buf);
256 		return;
257 	}
258 
259 	RTE_LOG(DEBUG, EAL, "receive uevent(name:%s, type:%d, subsystem:%d)\n",
260 		uevent.devname, uevent.type, uevent.subsystem);
261 
262 	switch (uevent.subsystem) {
263 	case EAL_DEV_EVENT_SUBSYSTEM_PCI:
264 	case EAL_DEV_EVENT_SUBSYSTEM_UIO:
265 		busname = "pci";
266 		break;
267 	default:
268 		break;
269 	}
270 
271 	if (uevent.devname) {
272 		if (uevent.type == RTE_DEV_EVENT_REMOVE && hotplug_handle) {
273 			rte_spinlock_lock(&failure_handle_lock);
274 			bus = rte_bus_find_by_name(busname);
275 			if (bus == NULL) {
276 				RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n",
277 					busname);
278 				goto failure_handle_err;
279 			}
280 
281 			dev = bus->find_device(NULL, cmp_dev_name,
282 					       uevent.devname);
283 			if (dev == NULL) {
284 				RTE_LOG(ERR, EAL, "Cannot find device (%s) on "
285 					"bus (%s)\n", uevent.devname, busname);
286 				goto failure_handle_err;
287 			}
288 
289 			ret = bus->hot_unplug_handler(dev);
290 			if (ret) {
291 				RTE_LOG(ERR, EAL, "Can not handle hot-unplug "
292 					"for device (%s)\n", dev->name);
293 			}
294 			rte_spinlock_unlock(&failure_handle_lock);
295 		}
296 		rte_dev_event_callback_process(uevent.devname, uevent.type);
297 		free(uevent.devname);
298 	}
299 
300 	return;
301 
302 failure_handle_err:
303 	rte_spinlock_unlock(&failure_handle_lock);
304 	free(uevent.devname);
305 }
306 
307 int
308 rte_dev_event_monitor_start(void)
309 {
310 	int ret = 0;
311 
312 	rte_rwlock_write_lock(&monitor_lock);
313 
314 	if (monitor_refcount) {
315 		monitor_refcount++;
316 		goto exit;
317 	}
318 
319 	intr_handle = rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_PRIVATE);
320 	if (intr_handle == NULL) {
321 		RTE_LOG(ERR, EAL, "Fail to allocate intr_handle\n");
322 		goto exit;
323 	}
324 
325 	ret = rte_intr_type_set(intr_handle, RTE_INTR_HANDLE_DEV_EVENT);
326 	if (ret)
327 		goto exit;
328 
329 	ret = rte_intr_fd_set(intr_handle, -1);
330 	if (ret)
331 		goto exit;
332 
333 	ret = dev_uev_socket_fd_create();
334 	if (ret) {
335 		RTE_LOG(ERR, EAL, "error create device event fd.\n");
336 		goto exit;
337 	}
338 
339 	ret = rte_intr_callback_register(intr_handle, dev_uev_handler, NULL);
340 
341 	if (ret) {
342 		close(rte_intr_fd_get(intr_handle));
343 		goto exit;
344 	}
345 
346 	monitor_refcount++;
347 
348 exit:
349 	if (ret) {
350 		rte_intr_instance_free(intr_handle);
351 		intr_handle = NULL;
352 	}
353 	rte_rwlock_write_unlock(&monitor_lock);
354 	return ret;
355 }
356 
357 int
358 rte_dev_event_monitor_stop(void)
359 {
360 	int ret = 0;
361 
362 	rte_rwlock_write_lock(&monitor_lock);
363 
364 	if (!monitor_refcount) {
365 		RTE_LOG(ERR, EAL, "device event monitor already stopped\n");
366 		goto exit;
367 	}
368 
369 	if (monitor_refcount > 1) {
370 		monitor_refcount--;
371 		goto exit;
372 	}
373 
374 	ret = rte_intr_callback_unregister(intr_handle, dev_uev_handler,
375 					   (void *)-1);
376 	if (ret < 0) {
377 		RTE_LOG(ERR, EAL, "fail to unregister uevent callback.\n");
378 		goto exit;
379 	}
380 
381 	close(rte_intr_fd_get(intr_handle));
382 	rte_intr_instance_free(intr_handle);
383 	intr_handle = NULL;
384 	ret = 0;
385 
386 	monitor_refcount--;
387 
388 exit:
389 	rte_rwlock_write_unlock(&monitor_lock);
390 
391 	return ret;
392 }
393 
394 int
395 dev_sigbus_handler_register(void)
396 {
397 	sigset_t mask;
398 	struct sigaction action;
399 
400 	rte_errno = 0;
401 
402 	if (sigbus_need_recover)
403 		return 0;
404 
405 	sigemptyset(&mask);
406 	sigaddset(&mask, SIGBUS);
407 	action.sa_flags = SA_SIGINFO;
408 	action.sa_mask = mask;
409 	action.sa_sigaction = sigbus_handler;
410 	sigbus_need_recover = !sigaction(SIGBUS, &action, &sigbus_action_old);
411 
412 	return rte_errno;
413 }
414 
415 int
416 dev_sigbus_handler_unregister(void)
417 {
418 	rte_errno = 0;
419 
420 	sigbus_action_recover();
421 
422 	return rte_errno;
423 }
424 
425 int
426 rte_dev_hotplug_handle_enable(void)
427 {
428 	int ret = 0;
429 
430 	ret = dev_sigbus_handler_register();
431 	if (ret < 0)
432 		RTE_LOG(ERR, EAL,
433 			"fail to register sigbus handler for devices.\n");
434 
435 	hotplug_handle = true;
436 
437 	return ret;
438 }
439 
440 int
441 rte_dev_hotplug_handle_disable(void)
442 {
443 	int ret = 0;
444 
445 	ret = dev_sigbus_handler_unregister();
446 	if (ret < 0)
447 		RTE_LOG(ERR, EAL,
448 			"fail to unregister sigbus handler for devices.\n");
449 
450 	hotplug_handle = false;
451 
452 	return ret;
453 }
454