xref: /dpdk/lib/eal/linux/eal_dev.c (revision daa02b5cddbb8e11b31d41e2bf7bb1ae64dcae2f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2018 Intel Corporation
3  */
4 
5 #include <string.h>
6 #include <unistd.h>
7 #include <fcntl.h>
8 #include <signal.h>
9 #include <sys/socket.h>
10 #include <linux/netlink.h>
11 
12 #include <rte_string_fns.h>
13 #include <rte_log.h>
14 #include <rte_compat.h>
15 #include <rte_dev.h>
16 #include <rte_malloc.h>
17 #include <rte_interrupts.h>
18 #include <rte_alarm.h>
19 #include <rte_bus.h>
20 #include <rte_eal.h>
21 #include <rte_spinlock.h>
22 #include <rte_errno.h>
23 
24 #include "eal_private.h"
25 
26 static struct rte_intr_handle intr_handle = {
27 	.type = RTE_INTR_HANDLE_DEV_EVENT,
28 	.fd = -1,
29 };
30 static rte_rwlock_t monitor_lock = RTE_RWLOCK_INITIALIZER;
31 static uint32_t monitor_refcount;
32 static bool hotplug_handle;
33 
34 #define EAL_UEV_MSG_LEN 4096
35 #define EAL_UEV_MSG_ELEM_LEN 128
36 
37 /*
38  * spinlock for device hot-unplug failure handling. If it try to access bus or
39  * device, such as handle sigbus on bus or handle memory failure for device
40  * just need to use this lock. It could protect the bus and the device to avoid
41  * race condition.
42  */
43 static rte_spinlock_t failure_handle_lock = RTE_SPINLOCK_INITIALIZER;
44 
45 static struct sigaction sigbus_action_old;
46 
47 static int sigbus_need_recover;
48 
49 static void dev_uev_handler(__rte_unused void *param);
50 
51 /* identify the system layer which reports this event. */
52 enum eal_dev_event_subsystem {
53 	EAL_DEV_EVENT_SUBSYSTEM_PCI, /* PCI bus device event */
54 	EAL_DEV_EVENT_SUBSYSTEM_UIO, /* UIO driver device event */
55 	EAL_DEV_EVENT_SUBSYSTEM_VFIO, /* VFIO driver device event */
56 	EAL_DEV_EVENT_SUBSYSTEM_MAX
57 };
58 
59 static void
60 sigbus_action_recover(void)
61 {
62 	if (sigbus_need_recover) {
63 		sigaction(SIGBUS, &sigbus_action_old, NULL);
64 		sigbus_need_recover = 0;
65 	}
66 }
67 
68 static void sigbus_handler(int signum, siginfo_t *info,
69 				void *ctx __rte_unused)
70 {
71 	int ret;
72 
73 	RTE_LOG(DEBUG, EAL, "Thread catch SIGBUS, fault address:%p\n",
74 		info->si_addr);
75 
76 	rte_spinlock_lock(&failure_handle_lock);
77 	ret = rte_bus_sigbus_handler(info->si_addr);
78 	rte_spinlock_unlock(&failure_handle_lock);
79 	if (ret == -1) {
80 		rte_exit(EXIT_FAILURE,
81 			 "Failed to handle SIGBUS for hot-unplug, "
82 			 "(rte_errno: %s)!", strerror(rte_errno));
83 	} else if (ret == 1) {
84 		if (sigbus_action_old.sa_flags == SA_SIGINFO
85 		    && sigbus_action_old.sa_sigaction) {
86 			(*(sigbus_action_old.sa_sigaction))(signum,
87 							    info, ctx);
88 		} else if (sigbus_action_old.sa_flags != SA_SIGINFO
89 			   && sigbus_action_old.sa_handler) {
90 			(*(sigbus_action_old.sa_handler))(signum);
91 		} else {
92 			rte_exit(EXIT_FAILURE,
93 				 "Failed to handle generic SIGBUS!");
94 		}
95 	}
96 
97 	RTE_LOG(DEBUG, EAL, "Success to handle SIGBUS for hot-unplug!\n");
98 }
99 
100 static int cmp_dev_name(const struct rte_device *dev,
101 	const void *_name)
102 {
103 	const char *name = _name;
104 
105 	return strcmp(dev->name, name);
106 }
107 
108 static int
109 dev_uev_socket_fd_create(void)
110 {
111 	struct sockaddr_nl addr;
112 	int ret;
113 
114 	intr_handle.fd = socket(PF_NETLINK, SOCK_RAW | SOCK_CLOEXEC |
115 			SOCK_NONBLOCK,
116 			NETLINK_KOBJECT_UEVENT);
117 	if (intr_handle.fd < 0) {
118 		RTE_LOG(ERR, EAL, "create uevent fd failed.\n");
119 		return -1;
120 	}
121 
122 	memset(&addr, 0, sizeof(addr));
123 	addr.nl_family = AF_NETLINK;
124 	addr.nl_pid = 0;
125 	addr.nl_groups = 0xffffffff;
126 
127 	ret = bind(intr_handle.fd, (struct sockaddr *) &addr, sizeof(addr));
128 	if (ret < 0) {
129 		RTE_LOG(ERR, EAL, "Failed to bind uevent socket.\n");
130 		goto err;
131 	}
132 
133 	return 0;
134 err:
135 	close(intr_handle.fd);
136 	intr_handle.fd = -1;
137 	return ret;
138 }
139 
140 struct rte_dev_event {
141 	enum rte_dev_event_type type;	/**< device event type */
142 	int subsystem;			/**< subsystem id */
143 	char *devname;			/**< device name */
144 };
145 
146 static int
147 dev_uev_parse(const char *buf, struct rte_dev_event *event, int length)
148 {
149 	char action[EAL_UEV_MSG_ELEM_LEN];
150 	char subsystem[EAL_UEV_MSG_ELEM_LEN];
151 	char pci_slot_name[EAL_UEV_MSG_ELEM_LEN];
152 	int i = 0;
153 
154 	memset(action, 0, EAL_UEV_MSG_ELEM_LEN);
155 	memset(subsystem, 0, EAL_UEV_MSG_ELEM_LEN);
156 	memset(pci_slot_name, 0, EAL_UEV_MSG_ELEM_LEN);
157 
158 	while (i < length) {
159 		for (; i < length; i++) {
160 			if (*buf)
161 				break;
162 			buf++;
163 		}
164 		/**
165 		 * check device uevent from kernel side, no need to check
166 		 * uevent from udev.
167 		 */
168 		if (!strncmp(buf, "libudev", 7)) {
169 			buf += 7;
170 			i += 7;
171 			return -1;
172 		}
173 		if (!strncmp(buf, "ACTION=", 7)) {
174 			buf += 7;
175 			i += 7;
176 			strlcpy(action, buf, sizeof(action));
177 		} else if (!strncmp(buf, "SUBSYSTEM=", 10)) {
178 			buf += 10;
179 			i += 10;
180 			strlcpy(subsystem, buf, sizeof(subsystem));
181 		} else if (!strncmp(buf, "PCI_SLOT_NAME=", 14)) {
182 			buf += 14;
183 			i += 14;
184 			strlcpy(pci_slot_name, buf, sizeof(subsystem));
185 			event->devname = strdup(pci_slot_name);
186 		}
187 		for (; i < length; i++) {
188 			if (*buf == '\0')
189 				break;
190 			buf++;
191 		}
192 	}
193 
194 	/* parse the subsystem layer */
195 	if (!strncmp(subsystem, "uio", 3))
196 		event->subsystem = EAL_DEV_EVENT_SUBSYSTEM_UIO;
197 	else if (!strncmp(subsystem, "pci", 3))
198 		event->subsystem = EAL_DEV_EVENT_SUBSYSTEM_PCI;
199 	else if (!strncmp(subsystem, "vfio", 4))
200 		event->subsystem = EAL_DEV_EVENT_SUBSYSTEM_VFIO;
201 	else
202 		goto err;
203 
204 	/* parse the action type */
205 	if (!strncmp(action, "add", 3))
206 		event->type = RTE_DEV_EVENT_ADD;
207 	else if (!strncmp(action, "remove", 6))
208 		event->type = RTE_DEV_EVENT_REMOVE;
209 	else
210 		goto err;
211 	return 0;
212 err:
213 	free(event->devname);
214 	return -1;
215 }
216 
217 static void
218 dev_delayed_unregister(void *param)
219 {
220 	rte_intr_callback_unregister(&intr_handle, dev_uev_handler, param);
221 	close(intr_handle.fd);
222 	intr_handle.fd = -1;
223 }
224 
225 static void
226 dev_uev_handler(__rte_unused void *param)
227 {
228 	struct rte_dev_event uevent;
229 	int ret;
230 	char buf[EAL_UEV_MSG_LEN];
231 	struct rte_bus *bus;
232 	struct rte_device *dev;
233 	const char *busname = "";
234 
235 	memset(&uevent, 0, sizeof(struct rte_dev_event));
236 	memset(buf, 0, EAL_UEV_MSG_LEN);
237 
238 	ret = recv(intr_handle.fd, buf, EAL_UEV_MSG_LEN, MSG_DONTWAIT);
239 	if (ret < 0 && errno == EAGAIN)
240 		return;
241 	else if (ret <= 0) {
242 		/* connection is closed or broken, can not up again. */
243 		RTE_LOG(ERR, EAL, "uevent socket connection is broken.\n");
244 		rte_eal_alarm_set(1, dev_delayed_unregister, NULL);
245 		return;
246 	}
247 
248 	ret = dev_uev_parse(buf, &uevent, EAL_UEV_MSG_LEN);
249 	if (ret < 0) {
250 		RTE_LOG(DEBUG, EAL, "Ignoring uevent '%s'\n", buf);
251 		return;
252 	}
253 
254 	RTE_LOG(DEBUG, EAL, "receive uevent(name:%s, type:%d, subsystem:%d)\n",
255 		uevent.devname, uevent.type, uevent.subsystem);
256 
257 	switch (uevent.subsystem) {
258 	case EAL_DEV_EVENT_SUBSYSTEM_PCI:
259 	case EAL_DEV_EVENT_SUBSYSTEM_UIO:
260 		busname = "pci";
261 		break;
262 	default:
263 		break;
264 	}
265 
266 	if (uevent.devname) {
267 		if (uevent.type == RTE_DEV_EVENT_REMOVE && hotplug_handle) {
268 			rte_spinlock_lock(&failure_handle_lock);
269 			bus = rte_bus_find_by_name(busname);
270 			if (bus == NULL) {
271 				RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n",
272 					busname);
273 				goto failure_handle_err;
274 			}
275 
276 			dev = bus->find_device(NULL, cmp_dev_name,
277 					       uevent.devname);
278 			if (dev == NULL) {
279 				RTE_LOG(ERR, EAL, "Cannot find device (%s) on "
280 					"bus (%s)\n", uevent.devname, busname);
281 				goto failure_handle_err;
282 			}
283 
284 			ret = bus->hot_unplug_handler(dev);
285 			if (ret) {
286 				RTE_LOG(ERR, EAL, "Can not handle hot-unplug "
287 					"for device (%s)\n", dev->name);
288 			}
289 			rte_spinlock_unlock(&failure_handle_lock);
290 		}
291 		rte_dev_event_callback_process(uevent.devname, uevent.type);
292 		free(uevent.devname);
293 	}
294 
295 	return;
296 
297 failure_handle_err:
298 	rte_spinlock_unlock(&failure_handle_lock);
299 	free(uevent.devname);
300 }
301 
302 int
303 rte_dev_event_monitor_start(void)
304 {
305 	int ret = 0;
306 
307 	rte_rwlock_write_lock(&monitor_lock);
308 
309 	if (monitor_refcount) {
310 		monitor_refcount++;
311 		goto exit;
312 	}
313 
314 	ret = dev_uev_socket_fd_create();
315 	if (ret) {
316 		RTE_LOG(ERR, EAL, "error create device event fd.\n");
317 		goto exit;
318 	}
319 
320 	ret = rte_intr_callback_register(&intr_handle, dev_uev_handler, NULL);
321 
322 	if (ret) {
323 		RTE_LOG(ERR, EAL, "fail to register uevent callback.\n");
324 		close(intr_handle.fd);
325 		intr_handle.fd = -1;
326 		goto exit;
327 	}
328 
329 	monitor_refcount++;
330 
331 exit:
332 	rte_rwlock_write_unlock(&monitor_lock);
333 	return ret;
334 }
335 
336 int
337 rte_dev_event_monitor_stop(void)
338 {
339 	int ret = 0;
340 
341 	rte_rwlock_write_lock(&monitor_lock);
342 
343 	if (!monitor_refcount) {
344 		RTE_LOG(ERR, EAL, "device event monitor already stopped\n");
345 		goto exit;
346 	}
347 
348 	if (monitor_refcount > 1) {
349 		monitor_refcount--;
350 		goto exit;
351 	}
352 
353 	ret = rte_intr_callback_unregister(&intr_handle, dev_uev_handler,
354 					   (void *)-1);
355 	if (ret < 0) {
356 		RTE_LOG(ERR, EAL, "fail to unregister uevent callback.\n");
357 		goto exit;
358 	}
359 
360 	close(intr_handle.fd);
361 	intr_handle.fd = -1;
362 
363 	monitor_refcount--;
364 
365 exit:
366 	rte_rwlock_write_unlock(&monitor_lock);
367 
368 	return ret;
369 }
370 
371 int
372 dev_sigbus_handler_register(void)
373 {
374 	sigset_t mask;
375 	struct sigaction action;
376 
377 	rte_errno = 0;
378 
379 	if (sigbus_need_recover)
380 		return 0;
381 
382 	sigemptyset(&mask);
383 	sigaddset(&mask, SIGBUS);
384 	action.sa_flags = SA_SIGINFO;
385 	action.sa_mask = mask;
386 	action.sa_sigaction = sigbus_handler;
387 	sigbus_need_recover = !sigaction(SIGBUS, &action, &sigbus_action_old);
388 
389 	return rte_errno;
390 }
391 
392 int
393 dev_sigbus_handler_unregister(void)
394 {
395 	rte_errno = 0;
396 
397 	sigbus_action_recover();
398 
399 	return rte_errno;
400 }
401 
402 int
403 rte_dev_hotplug_handle_enable(void)
404 {
405 	int ret = 0;
406 
407 	ret = dev_sigbus_handler_register();
408 	if (ret < 0)
409 		RTE_LOG(ERR, EAL,
410 			"fail to register sigbus handler for devices.\n");
411 
412 	hotplug_handle = true;
413 
414 	return ret;
415 }
416 
417 int
418 rte_dev_hotplug_handle_disable(void)
419 {
420 	int ret = 0;
421 
422 	ret = dev_sigbus_handler_unregister();
423 	if (ret < 0)
424 		RTE_LOG(ERR, EAL,
425 			"fail to unregister sigbus handler for devices.\n");
426 
427 	hotplug_handle = false;
428 
429 	return ret;
430 }
431