xref: /spdk/lib/util/fd_group.c (revision 00715c7c87f9558be1aa4aaef327a611aa9ff5b6)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2020 Intel Corporation. All rights reserved.
3  *   All rights reserved.
4  */
5 
6 #include "spdk_internal/usdt.h"
7 
8 #include "spdk/env.h"
9 #include "spdk/log.h"
10 #include "spdk/queue.h"
11 
12 #include "spdk/fd_group.h"
13 
14 #ifdef __linux__
15 #include <sys/epoll.h>
16 #endif
17 
18 #define SPDK_MAX_EVENT_NAME_LEN 256
19 
20 enum event_handler_state {
21 	/* The event_handler is added into an fd_group waiting for event,
22 	 * but not currently in the execution of a wait loop.
23 	 */
24 	EVENT_HANDLER_STATE_WAITING,
25 
26 	/* The event_handler is currently in the execution of a wait loop. */
27 	EVENT_HANDLER_STATE_RUNNING,
28 
29 	/* The event_handler was removed during the execution of a wait loop. */
30 	EVENT_HANDLER_STATE_REMOVED,
31 };
32 
33 /* Taking "ehdlr" as short name for file descriptor handler of the interrupt event. */
34 struct event_handler {
35 	TAILQ_ENTRY(event_handler)	next;
36 	enum event_handler_state	state;
37 
38 	spdk_fd_fn			fn;
39 	void				*fn_arg;
40 	/* file descriptor of the interrupt event */
41 	int				fd;
42 	uint32_t			events;
43 	char				name[SPDK_MAX_EVENT_NAME_LEN + 1];
44 };
45 
46 struct spdk_fd_group {
47 	int epfd;
48 
49 	/* Number of fds registered in this group. The epoll file descriptor of this fd group
50 	 * i.e. epfd waits for interrupt event on all the fds from its interrupt sources list, as
51 	 * well as from all its children fd group interrupt sources list.
52 	 */
53 	uint32_t num_fds;
54 
55 	struct spdk_fd_group *parent;
56 
57 	/* interrupt sources list */
58 	TAILQ_HEAD(, event_handler) event_handlers;
59 };
60 
61 int
62 spdk_fd_group_get_fd(struct spdk_fd_group *fgrp)
63 {
64 	return fgrp->epfd;
65 }
66 
67 #ifdef __linux__
68 
69 static __thread struct epoll_event *g_event = NULL;
70 
71 int
72 spdk_fd_group_get_epoll_event(struct epoll_event *event)
73 {
74 	if (g_event == NULL) {
75 		return -EINVAL;
76 	}
77 	*event = *g_event;
78 	return 0;
79 }
80 
81 static int
82 _fd_group_del_all(int epfd, struct spdk_fd_group *grp)
83 {
84 	struct event_handler *ehdlr = NULL;
85 	struct epoll_event epevent = {0};
86 	int rc;
87 	int ret = 0;
88 
89 	TAILQ_FOREACH(ehdlr, &grp->event_handlers, next) {
90 		rc = epoll_ctl(epfd, EPOLL_CTL_DEL, ehdlr->fd, NULL);
91 		if (rc < 0) {
92 			if (errno == ENOENT) {
93 				/* This is treated as success. It happens if there are multiple
94 				 * attempts to remove fds from the group.
95 				 */
96 				continue;
97 			}
98 
99 			ret = -errno;
100 			SPDK_ERRLOG("Failed to remove fd: %d from group: %s\n",
101 				    ehdlr->fd, strerror(errno));
102 			goto recover;
103 		}
104 		ret++;
105 	}
106 
107 	return ret;
108 
109 recover:
110 	/* We failed to remove everything. Let's try to put everything back into
111 	 * the original group. */
112 	TAILQ_FOREACH(ehdlr, &grp->event_handlers, next) {
113 		epevent.events = ehdlr->events;
114 		epevent.data.ptr = ehdlr;
115 		rc = epoll_ctl(epfd, EPOLL_CTL_ADD, ehdlr->fd, &epevent);
116 		if (rc < 0) {
117 			if (errno == EEXIST) {
118 				/* This is fine. Keep going. */
119 				continue;
120 			}
121 
122 			/* Continue on even though we've failed. But indicate
123 			 * this is a fatal error. */
124 			SPDK_ERRLOG("Failed to recover fd_group_del_all: %s\n", strerror(errno));
125 			ret = -ENOTRECOVERABLE;
126 		}
127 	}
128 
129 	return ret;
130 }
131 
132 static int
133 _fd_group_add_all(int epfd, struct spdk_fd_group *grp)
134 {
135 	struct event_handler *ehdlr = NULL;
136 	struct epoll_event epevent = {0};
137 	int rc;
138 	int ret = 0;
139 
140 	/* Hoist the fds from the child up into the parent */
141 	TAILQ_FOREACH(ehdlr, &grp->event_handlers, next) {
142 		epevent.events = ehdlr->events;
143 		epevent.data.ptr = ehdlr;
144 		rc = epoll_ctl(epfd, EPOLL_CTL_ADD, ehdlr->fd, &epevent);
145 		if (rc < 0) {
146 			if (errno == EEXIST) {
147 				/* This is treated as success */
148 				continue;
149 			}
150 
151 			ret = -errno;
152 			SPDK_ERRLOG("Failed to add fd: %d to fd group: %s\n",
153 				    ehdlr->fd, strerror(errno));
154 			goto recover;
155 		}
156 		ret++;
157 	}
158 
159 	return ret;
160 
161 recover:
162 	/* We failed to add everything, so try to remove what we did add. */
163 	TAILQ_FOREACH(ehdlr, &grp->event_handlers, next) {
164 		rc = epoll_ctl(epfd, EPOLL_CTL_DEL, ehdlr->fd, NULL);
165 		if (rc < 0) {
166 			if (errno == ENOENT) {
167 				/* This is treated as success. */
168 				continue;
169 			}
170 
171 
172 			/* Continue on even though we've failed. But indicate
173 			 * this is a fatal error. */
174 			SPDK_ERRLOG("Failed to recover fd_group_del_all: %s\n", strerror(errno));
175 			ret = -ENOTRECOVERABLE;
176 		}
177 	}
178 
179 	return ret;
180 }
181 
182 int
183 spdk_fd_group_unnest(struct spdk_fd_group *parent, struct spdk_fd_group *child)
184 {
185 	int rc;
186 
187 	if (parent == NULL || child == NULL) {
188 		return -EINVAL;
189 	}
190 
191 	if (child->parent != parent) {
192 		return -EINVAL;
193 	}
194 
195 	rc = _fd_group_del_all(parent->epfd, child);
196 	if (rc < 0) {
197 		return rc;
198 	} else {
199 		assert(parent->num_fds >= (uint32_t)rc);
200 		parent->num_fds -= rc;
201 	}
202 
203 	child->parent = NULL;
204 
205 	rc = _fd_group_add_all(child->epfd, child);
206 	if (rc < 0) {
207 		return rc;
208 	} else {
209 		child->num_fds += rc;
210 	}
211 
212 	return 0;
213 }
214 
215 int
216 spdk_fd_group_nest(struct spdk_fd_group *parent, struct spdk_fd_group *child)
217 {
218 	int rc;
219 
220 	if (parent == NULL || child == NULL) {
221 		return -EINVAL;
222 	}
223 
224 	if (child->parent) {
225 		return -EINVAL;
226 	}
227 
228 	if (parent->parent) {
229 		/* More than one layer of nesting is currently not supported */
230 		assert(false);
231 		return -ENOTSUP;
232 	}
233 
234 	rc = _fd_group_del_all(child->epfd, child);
235 	if (rc < 0) {
236 		return rc;
237 	} else {
238 		assert(child->num_fds >= (uint32_t)rc);
239 		child->num_fds -= rc;
240 	}
241 
242 	child->parent = parent;
243 
244 	rc =  _fd_group_add_all(parent->epfd, child);
245 	if (rc < 0) {
246 		return rc;
247 	} else {
248 		parent->num_fds += rc;
249 	}
250 
251 	return 0;
252 }
253 
254 int
255 spdk_fd_group_add(struct spdk_fd_group *fgrp, int efd, spdk_fd_fn fn,
256 		  void *arg, const char *name)
257 {
258 	return spdk_fd_group_add_for_events(fgrp, efd, EPOLLIN, fn, arg, name);
259 }
260 
261 int
262 spdk_fd_group_add_for_events(struct spdk_fd_group *fgrp, int efd, uint32_t events,
263 			     spdk_fd_fn fn, void *arg, const char *name)
264 {
265 	struct event_handler *ehdlr = NULL;
266 	struct epoll_event epevent = {0};
267 	int rc;
268 	int epfd;
269 
270 	/* parameter checking */
271 	if (fgrp == NULL || efd < 0 || fn == NULL) {
272 		return -EINVAL;
273 	}
274 
275 	/* check if there is already one function registered for this fd */
276 	TAILQ_FOREACH(ehdlr, &fgrp->event_handlers, next) {
277 		if (ehdlr->fd == efd) {
278 			return -EEXIST;
279 		}
280 	}
281 
282 	/* create a new event src */
283 	ehdlr = calloc(1, sizeof(*ehdlr));
284 	if (ehdlr == NULL) {
285 		return -errno;
286 	}
287 
288 	ehdlr->fd = efd;
289 	ehdlr->fn = fn;
290 	ehdlr->fn_arg = arg;
291 	ehdlr->state = EVENT_HANDLER_STATE_WAITING;
292 	ehdlr->events = events;
293 	snprintf(ehdlr->name, sizeof(ehdlr->name), "%s", name);
294 
295 	if (fgrp->parent) {
296 		epfd = fgrp->parent->epfd;
297 	} else {
298 		epfd = fgrp->epfd;
299 	}
300 
301 	epevent.events = ehdlr->events;
302 	epevent.data.ptr = ehdlr;
303 	rc = epoll_ctl(epfd, EPOLL_CTL_ADD, efd, &epevent);
304 	if (rc < 0) {
305 		SPDK_ERRLOG("Failed to add fd: %d to fd group(%p): %s\n",
306 			    efd, fgrp, strerror(errno));
307 		free(ehdlr);
308 		return -errno;
309 	}
310 
311 	TAILQ_INSERT_TAIL(&fgrp->event_handlers, ehdlr, next);
312 	if (fgrp->parent) {
313 		fgrp->parent->num_fds++;
314 	} else {
315 		fgrp->num_fds++;
316 	}
317 
318 	return 0;
319 }
320 
321 void
322 spdk_fd_group_remove(struct spdk_fd_group *fgrp, int efd)
323 {
324 	struct event_handler *ehdlr;
325 	int rc;
326 	int epfd;
327 
328 	if (fgrp == NULL || efd < 0) {
329 		SPDK_ERRLOG("Cannot remove fd: %d from fd group(%p)\n", efd, fgrp);
330 		assert(0);
331 		return;
332 	}
333 
334 
335 	TAILQ_FOREACH(ehdlr, &fgrp->event_handlers, next) {
336 		if (ehdlr->fd == efd) {
337 			break;
338 		}
339 	}
340 
341 	if (ehdlr == NULL) {
342 		SPDK_ERRLOG("fd: %d doesn't exist in fd group(%p)\n", efd, fgrp);
343 		return;
344 	}
345 
346 	assert(ehdlr->state != EVENT_HANDLER_STATE_REMOVED);
347 
348 	if (fgrp->parent) {
349 		epfd = fgrp->parent->epfd;
350 	} else {
351 		epfd = fgrp->epfd;
352 	}
353 
354 	rc = epoll_ctl(epfd, EPOLL_CTL_DEL, ehdlr->fd, NULL);
355 	if (rc < 0) {
356 		SPDK_ERRLOG("Failed to remove fd: %d from fd group(%p): %s\n",
357 			    ehdlr->fd, fgrp, strerror(errno));
358 		return;
359 	}
360 
361 	if (fgrp->parent) {
362 		assert(fgrp->parent->num_fds > 0);
363 		fgrp->parent->num_fds--;
364 	} else {
365 		assert(fgrp->num_fds > 0);
366 		fgrp->num_fds--;
367 	}
368 	TAILQ_REMOVE(&fgrp->event_handlers, ehdlr, next);
369 
370 	/* Delay ehdlr's free in case it is waiting for execution in fgrp wait loop */
371 	if (ehdlr->state == EVENT_HANDLER_STATE_RUNNING) {
372 		ehdlr->state = EVENT_HANDLER_STATE_REMOVED;
373 	} else {
374 		free(ehdlr);
375 	}
376 }
377 
378 int
379 spdk_fd_group_event_modify(struct spdk_fd_group *fgrp,
380 			   int efd, int event_types)
381 {
382 	struct epoll_event epevent;
383 	struct event_handler *ehdlr;
384 	int epfd;
385 
386 	if (fgrp == NULL || efd < 0) {
387 		return -EINVAL;
388 	}
389 
390 	TAILQ_FOREACH(ehdlr, &fgrp->event_handlers, next) {
391 		if (ehdlr->fd == efd) {
392 			break;
393 		}
394 	}
395 
396 	if (ehdlr == NULL) {
397 		return -EINVAL;
398 	}
399 
400 	assert(ehdlr->state != EVENT_HANDLER_STATE_REMOVED);
401 
402 	ehdlr->events = event_types;
403 
404 	if (fgrp->parent) {
405 		epfd = fgrp->parent->epfd;
406 	} else {
407 		epfd = fgrp->epfd;
408 	}
409 
410 	epevent.events = ehdlr->events;
411 	epevent.data.ptr = ehdlr;
412 
413 	return epoll_ctl(epfd, EPOLL_CTL_MOD, ehdlr->fd, &epevent);
414 }
415 
416 int
417 spdk_fd_group_create(struct spdk_fd_group **_egrp)
418 {
419 	struct spdk_fd_group *fgrp;
420 
421 	if (_egrp == NULL) {
422 		return -EINVAL;
423 	}
424 
425 	fgrp = calloc(1, sizeof(*fgrp));
426 	if (fgrp == NULL) {
427 		return -ENOMEM;
428 	}
429 
430 	/* init the event source head */
431 	TAILQ_INIT(&fgrp->event_handlers);
432 
433 	fgrp->num_fds = 0;
434 	fgrp->epfd = epoll_create1(EPOLL_CLOEXEC);
435 	if (fgrp->epfd < 0) {
436 		free(fgrp);
437 		return -errno;
438 	}
439 
440 	*_egrp = fgrp;
441 
442 	return 0;
443 }
444 
445 void
446 spdk_fd_group_destroy(struct spdk_fd_group *fgrp)
447 {
448 	if (fgrp == NULL || fgrp->num_fds > 0) {
449 		if (!fgrp) {
450 			SPDK_ERRLOG("fd_group doesn't exist.\n");
451 		} else {
452 			SPDK_ERRLOG("Cannot delete fd group(%p) as (%u) fds are still registered to it.\n",
453 				    fgrp, fgrp->num_fds);
454 		}
455 		assert(0);
456 		return;
457 	}
458 
459 	/* Check if someone tried to delete the fd group before unnesting it */
460 	if (!TAILQ_EMPTY(&fgrp->event_handlers)) {
461 		SPDK_ERRLOG("Interrupt sources list not empty.\n");
462 		assert(0);
463 		return;
464 	}
465 
466 	close(fgrp->epfd);
467 	free(fgrp);
468 
469 	return;
470 }
471 
472 int
473 spdk_fd_group_wait(struct spdk_fd_group *fgrp, int timeout)
474 {
475 	uint32_t totalfds = fgrp->num_fds;
476 	struct epoll_event events[totalfds];
477 	struct event_handler *ehdlr;
478 	int n;
479 	int nfds;
480 
481 	if (fgrp->parent != NULL) {
482 		if (timeout < 0) {
483 			SPDK_ERRLOG("Calling spdk_fd_group_wait on a group nested in another group without a timeout will block indefinitely.\n");
484 			assert(false);
485 			return -EINVAL;
486 		} else {
487 			SPDK_WARNLOG("Calling spdk_fd_group_wait on a group nested in another group will never find any events.\n");
488 			return 0;
489 		}
490 	}
491 
492 	nfds = epoll_wait(fgrp->epfd, events, totalfds, timeout);
493 	if (nfds < 0) {
494 		if (errno != EINTR) {
495 			SPDK_ERRLOG("fd group(%p) epoll_wait failed: %s\n",
496 				    fgrp, strerror(errno));
497 		}
498 
499 		return -errno;
500 	} else if (nfds == 0) {
501 		return 0;
502 	}
503 
504 	for (n = 0; n < nfds; n++) {
505 		/* find the event_handler */
506 		ehdlr = events[n].data.ptr;
507 
508 		if (ehdlr == NULL) {
509 			continue;
510 		}
511 
512 		/* Tag ehdlr as running state in case that it is removed
513 		 * during this wait loop but before or when it get executed.
514 		 */
515 		assert(ehdlr->state == EVENT_HANDLER_STATE_WAITING);
516 		ehdlr->state = EVENT_HANDLER_STATE_RUNNING;
517 	}
518 
519 	for (n = 0; n < nfds; n++) {
520 		/* find the event_handler */
521 		ehdlr = events[n].data.ptr;
522 
523 		if (ehdlr == NULL || ehdlr->fn == NULL) {
524 			continue;
525 		}
526 
527 		/* It is possible that the ehdlr was removed
528 		 * during this wait loop but before it get executed.
529 		 */
530 		if (ehdlr->state == EVENT_HANDLER_STATE_REMOVED) {
531 			free(ehdlr);
532 			continue;
533 		}
534 
535 		g_event = &events[n];
536 		/* call the interrupt response function */
537 		ehdlr->fn(ehdlr->fn_arg);
538 		g_event = NULL;
539 
540 		/* It is possible that the ehdlr was removed
541 		 * during this wait loop when it get executed.
542 		 */
543 		if (ehdlr->state == EVENT_HANDLER_STATE_REMOVED) {
544 			free(ehdlr);
545 		} else {
546 			ehdlr->state = EVENT_HANDLER_STATE_WAITING;
547 		}
548 	}
549 
550 	return nfds;
551 }
552 
553 #else
554 
555 int
556 spdk_fd_group_get_epoll_event(struct epoll_event *event)
557 {
558 	return -ENOTSUP;
559 }
560 
561 int
562 spdk_fd_group_add(struct spdk_fd_group *fgrp, int efd, spdk_fd_fn fn,
563 		  void *arg, const char *name)
564 {
565 	return -ENOTSUP;
566 }
567 
568 int
569 spdk_fd_group_add_for_events(struct spdk_fd_group *fgrp, int efd, uint32_t events, spdk_fd_fn fn,
570 			     void *arg, const char *name)
571 {
572 	return -ENOTSUP;
573 }
574 
575 void
576 spdk_fd_group_remove(struct spdk_fd_group *fgrp, int efd)
577 {
578 }
579 
580 int
581 spdk_fd_group_event_modify(struct spdk_fd_group *fgrp,
582 			   int efd, int event_types)
583 {
584 	return -ENOTSUP;
585 }
586 
587 int
588 spdk_fd_group_create(struct spdk_fd_group **fgrp)
589 {
590 	return -ENOTSUP;
591 }
592 
593 void
594 spdk_fd_group_destroy(struct spdk_fd_group *fgrp)
595 {
596 }
597 
598 int
599 spdk_fd_group_wait(struct spdk_fd_group *fgrp, int timeout)
600 {
601 	return -ENOTSUP;
602 }
603 
604 int
605 spdk_fd_group_unnest(struct spdk_fd_group *parent, struct spdk_fd_group *child)
606 {
607 	return -ENOTSUP;
608 }
609 
610 int
611 spdk_fd_group_nest(struct spdk_fd_group *parent, struct spdk_fd_group *child)
612 {
613 	return -ENOTSUP;
614 }
615 
616 #endif
617