xref: /dpdk/lib/eal/linux/eal_dev.c (revision fcfb19cda4f6439f260cdeabb6c44d3c6f0d5fc4)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2018 Intel Corporation
3  */
4 
5 #include <stdlib.h>
6 #include <string.h>
7 #include <unistd.h>
8 #include <signal.h>
9 #include <sys/socket.h>
10 #include <linux/netlink.h>
11 
12 #include <rte_string_fns.h>
13 #include <rte_log.h>
14 #include <rte_dev.h>
15 #include <rte_interrupts.h>
16 #include <rte_alarm.h>
17 #include <bus_driver.h>
18 #include <rte_spinlock.h>
19 #include <rte_errno.h>
20 
21 #include "eal_private.h"
22 
23 static struct rte_intr_handle *intr_handle;
24 static rte_rwlock_t monitor_lock = RTE_RWLOCK_INITIALIZER;
25 static uint32_t monitor_refcount;
26 static bool hotplug_handle;
27 
28 #define EAL_UEV_MSG_LEN 4096
29 #define EAL_UEV_MSG_ELEM_LEN 128
30 
31 /*
32  * spinlock for device hot-unplug failure handling. If it try to access bus or
33  * device, such as handle sigbus on bus or handle memory failure for device
34  * just need to use this lock. It could protect the bus and the device to avoid
35  * race condition.
36  */
37 static rte_spinlock_t failure_handle_lock = RTE_SPINLOCK_INITIALIZER;
38 
39 static struct sigaction sigbus_action_old;
40 
41 static int sigbus_need_recover;
42 
43 static void dev_uev_handler(__rte_unused void *param);
44 
45 /* identify the system layer which reports this event. */
46 enum eal_dev_event_subsystem {
47 	EAL_DEV_EVENT_SUBSYSTEM_PCI, /* PCI bus device event */
48 	EAL_DEV_EVENT_SUBSYSTEM_UIO, /* UIO driver device event */
49 	EAL_DEV_EVENT_SUBSYSTEM_VFIO, /* VFIO driver device event */
50 	EAL_DEV_EVENT_SUBSYSTEM_MAX
51 };
52 
53 static void
sigbus_action_recover(void)54 sigbus_action_recover(void)
55 {
56 	if (sigbus_need_recover) {
57 		sigaction(SIGBUS, &sigbus_action_old, NULL);
58 		sigbus_need_recover = 0;
59 	}
60 }
61 
sigbus_handler(int signum,siginfo_t * info,void * ctx __rte_unused)62 static void sigbus_handler(int signum, siginfo_t *info,
63 				void *ctx __rte_unused)
64 {
65 	int ret;
66 
67 	EAL_LOG(DEBUG, "Thread catch SIGBUS, fault address:%p",
68 		info->si_addr);
69 
70 	rte_spinlock_lock(&failure_handle_lock);
71 	ret = rte_bus_sigbus_handler(info->si_addr);
72 	rte_spinlock_unlock(&failure_handle_lock);
73 	if (ret == -1) {
74 		rte_exit(EXIT_FAILURE,
75 			 "Failed to handle SIGBUS for hot-unplug, "
76 			 "(rte_errno: %s)!", strerror(rte_errno));
77 	} else if (ret == 1) {
78 		if (sigbus_action_old.sa_flags == SA_SIGINFO
79 		    && sigbus_action_old.sa_sigaction) {
80 			(*(sigbus_action_old.sa_sigaction))(signum,
81 							    info, ctx);
82 		} else if (sigbus_action_old.sa_flags != SA_SIGINFO
83 			   && sigbus_action_old.sa_handler) {
84 			(*(sigbus_action_old.sa_handler))(signum);
85 		} else {
86 			rte_exit(EXIT_FAILURE,
87 				 "Failed to handle generic SIGBUS!");
88 		}
89 	}
90 
91 	EAL_LOG(DEBUG, "Success to handle SIGBUS for hot-unplug!");
92 }
93 
cmp_dev_name(const struct rte_device * dev,const void * _name)94 static int cmp_dev_name(const struct rte_device *dev,
95 	const void *_name)
96 {
97 	const char *name = _name;
98 
99 	return strcmp(dev->name, name);
100 }
101 
102 static int
dev_uev_socket_fd_create(void)103 dev_uev_socket_fd_create(void)
104 {
105 	struct sockaddr_nl addr;
106 	int ret, fd;
107 
108 	fd = socket(PF_NETLINK, SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK,
109 		    NETLINK_KOBJECT_UEVENT);
110 	if (fd < 0) {
111 		EAL_LOG(ERR, "create uevent fd failed.");
112 		return -1;
113 	}
114 
115 	memset(&addr, 0, sizeof(addr));
116 	addr.nl_family = AF_NETLINK;
117 	addr.nl_pid = 0;
118 	addr.nl_groups = 0xffffffff;
119 
120 	ret = bind(fd, (struct sockaddr *) &addr, sizeof(addr));
121 	if (ret < 0) {
122 		EAL_LOG(ERR, "Failed to bind uevent socket.");
123 		goto err;
124 	}
125 
126 	if (rte_intr_fd_set(intr_handle, fd))
127 		goto err;
128 
129 	return 0;
130 err:
131 	close(fd);
132 	fd = -1;
133 	return ret;
134 }
135 
136 struct rte_dev_event {
137 	enum rte_dev_event_type type;	/**< device event type */
138 	int subsystem;			/**< subsystem id */
139 	char *devname;			/**< device name */
140 };
141 
142 static int
dev_uev_parse(const char * buf,struct rte_dev_event * event,int length)143 dev_uev_parse(const char *buf, struct rte_dev_event *event, int length)
144 {
145 	char action[EAL_UEV_MSG_ELEM_LEN];
146 	char subsystem[EAL_UEV_MSG_ELEM_LEN];
147 	char pci_slot_name[EAL_UEV_MSG_ELEM_LEN];
148 	int i = 0;
149 
150 	memset(action, 0, EAL_UEV_MSG_ELEM_LEN);
151 	memset(subsystem, 0, EAL_UEV_MSG_ELEM_LEN);
152 	memset(pci_slot_name, 0, EAL_UEV_MSG_ELEM_LEN);
153 
154 	while (i < length) {
155 		for (; i < length; i++) {
156 			if (*buf)
157 				break;
158 			buf++;
159 		}
160 		if (i >= length)
161 			break;
162 
163 		/**
164 		 * check device uevent from kernel side, no need to check
165 		 * uevent from udev.
166 		 */
167 		if (!strncmp(buf, "libudev", 7)) {
168 			buf += 7;
169 			i += 7;
170 			return -1;
171 		}
172 		if (!strncmp(buf, "ACTION=", 7)) {
173 			buf += 7;
174 			i += 7;
175 			strlcpy(action, buf, sizeof(action));
176 		} else if (!strncmp(buf, "SUBSYSTEM=", 10)) {
177 			buf += 10;
178 			i += 10;
179 			strlcpy(subsystem, buf, sizeof(subsystem));
180 		} else if (!strncmp(buf, "PCI_SLOT_NAME=", 14)) {
181 			buf += 14;
182 			i += 14;
183 			strlcpy(pci_slot_name, buf, sizeof(subsystem));
184 			event->devname = strdup(pci_slot_name);
185 			if (event->devname == NULL)
186 				return -1;
187 		}
188 		for (; i < length; i++) {
189 			if (*buf == '\0')
190 				break;
191 			buf++;
192 		}
193 	}
194 
195 	/* parse the subsystem layer */
196 	if (!strncmp(subsystem, "uio", 3))
197 		event->subsystem = EAL_DEV_EVENT_SUBSYSTEM_UIO;
198 	else if (!strncmp(subsystem, "pci", 3))
199 		event->subsystem = EAL_DEV_EVENT_SUBSYSTEM_PCI;
200 	else if (!strncmp(subsystem, "vfio", 4))
201 		event->subsystem = EAL_DEV_EVENT_SUBSYSTEM_VFIO;
202 	else
203 		goto err;
204 
205 	/* parse the action type */
206 	if (!strncmp(action, "add", 3))
207 		event->type = RTE_DEV_EVENT_ADD;
208 	else if (!strncmp(action, "remove", 6))
209 		event->type = RTE_DEV_EVENT_REMOVE;
210 	else
211 		goto err;
212 	return 0;
213 err:
214 	free(event->devname);
215 	return -1;
216 }
217 
218 static void
dev_delayed_unregister(void * param)219 dev_delayed_unregister(void *param)
220 {
221 	rte_intr_callback_unregister(intr_handle, dev_uev_handler, param);
222 	if (rte_intr_fd_get(intr_handle) >= 0) {
223 		close(rte_intr_fd_get(intr_handle));
224 		rte_intr_fd_set(intr_handle, -1);
225 	}
226 }
227 
228 static void
dev_uev_handler(__rte_unused void * param)229 dev_uev_handler(__rte_unused void *param)
230 {
231 	struct rte_dev_event uevent;
232 	int ret;
233 	char buf[EAL_UEV_MSG_LEN + 1];
234 	struct rte_bus *bus;
235 	struct rte_device *dev;
236 	const char *busname = "";
237 
238 	memset(&uevent, 0, sizeof(struct rte_dev_event));
239 	memset(buf, 0, EAL_UEV_MSG_LEN + 1);
240 
241 	if (rte_intr_fd_get(intr_handle) < 0)
242 		return;
243 
244 	ret = recv(rte_intr_fd_get(intr_handle), buf, EAL_UEV_MSG_LEN,
245 		   MSG_DONTWAIT);
246 	if (ret < 0 && errno == EAGAIN)
247 		return;
248 	else if (ret <= 0) {
249 		/* connection is closed or broken, can not up again. */
250 		EAL_LOG(ERR, "uevent socket connection is broken.");
251 		rte_eal_alarm_set(1, dev_delayed_unregister, NULL);
252 		return;
253 	}
254 
255 	ret = dev_uev_parse(buf, &uevent, EAL_UEV_MSG_LEN);
256 	if (ret < 0) {
257 		EAL_LOG(DEBUG, "Ignoring uevent '%s'", buf);
258 		return;
259 	}
260 
261 	EAL_LOG(DEBUG, "receive uevent(name:%s, type:%d, subsystem:%d)",
262 		uevent.devname, uevent.type, uevent.subsystem);
263 
264 	switch (uevent.subsystem) {
265 	case EAL_DEV_EVENT_SUBSYSTEM_PCI:
266 	case EAL_DEV_EVENT_SUBSYSTEM_UIO:
267 		busname = "pci";
268 		break;
269 	default:
270 		break;
271 	}
272 
273 	if (uevent.devname) {
274 		if (uevent.type == RTE_DEV_EVENT_REMOVE && hotplug_handle) {
275 			rte_spinlock_lock(&failure_handle_lock);
276 			bus = rte_bus_find_by_name(busname);
277 			if (bus == NULL) {
278 				EAL_LOG(ERR, "Cannot find bus (%s)",
279 					busname);
280 				goto failure_handle_err;
281 			}
282 
283 			dev = bus->find_device(NULL, cmp_dev_name,
284 					       uevent.devname);
285 			if (dev == NULL) {
286 				EAL_LOG(ERR, "Cannot find device (%s) on "
287 					"bus (%s)", uevent.devname, busname);
288 				goto failure_handle_err;
289 			}
290 
291 			ret = bus->hot_unplug_handler(dev);
292 			if (ret) {
293 				EAL_LOG(ERR, "Can not handle hot-unplug "
294 					"for device (%s)", dev->name);
295 			}
296 			rte_spinlock_unlock(&failure_handle_lock);
297 		}
298 		rte_dev_event_callback_process(uevent.devname, uevent.type);
299 		free(uevent.devname);
300 	}
301 
302 	return;
303 
304 failure_handle_err:
305 	rte_spinlock_unlock(&failure_handle_lock);
306 	free(uevent.devname);
307 }
308 
309 int
rte_dev_event_monitor_start(void)310 rte_dev_event_monitor_start(void)
311 {
312 	int ret = 0;
313 
314 	rte_rwlock_write_lock(&monitor_lock);
315 
316 	if (monitor_refcount) {
317 		monitor_refcount++;
318 		goto exit;
319 	}
320 
321 	intr_handle = rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_PRIVATE);
322 	if (intr_handle == NULL) {
323 		EAL_LOG(ERR, "Fail to allocate intr_handle");
324 		goto exit;
325 	}
326 
327 	ret = rte_intr_type_set(intr_handle, RTE_INTR_HANDLE_DEV_EVENT);
328 	if (ret)
329 		goto exit;
330 
331 	ret = rte_intr_fd_set(intr_handle, -1);
332 	if (ret)
333 		goto exit;
334 
335 	ret = dev_uev_socket_fd_create();
336 	if (ret) {
337 		EAL_LOG(ERR, "error create device event fd.");
338 		goto exit;
339 	}
340 
341 	ret = rte_intr_callback_register(intr_handle, dev_uev_handler, NULL);
342 
343 	if (ret) {
344 		close(rte_intr_fd_get(intr_handle));
345 		goto exit;
346 	}
347 
348 	monitor_refcount++;
349 
350 exit:
351 	if (ret) {
352 		rte_intr_instance_free(intr_handle);
353 		intr_handle = NULL;
354 	}
355 	rte_rwlock_write_unlock(&monitor_lock);
356 	return ret;
357 }
358 
359 int
rte_dev_event_monitor_stop(void)360 rte_dev_event_monitor_stop(void)
361 {
362 	int ret = 0;
363 
364 	rte_rwlock_write_lock(&monitor_lock);
365 
366 	if (!monitor_refcount) {
367 		EAL_LOG(ERR, "device event monitor already stopped");
368 		goto exit;
369 	}
370 
371 	if (monitor_refcount > 1) {
372 		monitor_refcount--;
373 		goto exit;
374 	}
375 
376 	ret = rte_intr_callback_unregister(intr_handle, dev_uev_handler,
377 					   (void *)-1);
378 	if (ret < 0) {
379 		EAL_LOG(ERR, "fail to unregister uevent callback.");
380 		goto exit;
381 	}
382 
383 	close(rte_intr_fd_get(intr_handle));
384 	rte_intr_instance_free(intr_handle);
385 	intr_handle = NULL;
386 	ret = 0;
387 
388 	monitor_refcount--;
389 
390 exit:
391 	rte_rwlock_write_unlock(&monitor_lock);
392 
393 	return ret;
394 }
395 
396 static int
dev_sigbus_handler_register(void)397 dev_sigbus_handler_register(void)
398 {
399 	sigset_t mask;
400 	struct sigaction action;
401 
402 	rte_errno = 0;
403 
404 	if (sigbus_need_recover)
405 		return 0;
406 
407 	sigemptyset(&mask);
408 	sigaddset(&mask, SIGBUS);
409 	action.sa_flags = SA_SIGINFO;
410 	action.sa_mask = mask;
411 	action.sa_sigaction = sigbus_handler;
412 	sigbus_need_recover = !sigaction(SIGBUS, &action, &sigbus_action_old);
413 
414 	return rte_errno;
415 }
416 
417 static int
dev_sigbus_handler_unregister(void)418 dev_sigbus_handler_unregister(void)
419 {
420 	rte_errno = 0;
421 
422 	sigbus_action_recover();
423 
424 	return rte_errno;
425 }
426 
427 int
rte_dev_hotplug_handle_enable(void)428 rte_dev_hotplug_handle_enable(void)
429 {
430 	int ret = 0;
431 
432 	ret = dev_sigbus_handler_register();
433 	if (ret < 0)
434 		EAL_LOG(ERR,
435 			"fail to register sigbus handler for devices.");
436 
437 	hotplug_handle = true;
438 
439 	return ret;
440 }
441 
442 int
rte_dev_hotplug_handle_disable(void)443 rte_dev_hotplug_handle_disable(void)
444 {
445 	int ret = 0;
446 
447 	ret = dev_sigbus_handler_unregister();
448 	if (ret < 0)
449 		EAL_LOG(ERR,
450 			"fail to unregister sigbus handler for devices.");
451 
452 	hotplug_handle = false;
453 
454 	return ret;
455 }
456