xref: /spdk/lib/util/fd_group.c (revision 318515b44ec8b67f83bcc9ca83f0c7d5ea919e62)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2020 Intel Corporation. All rights reserved.
3  *   All rights reserved.
4  */
5 
6 #include "spdk_internal/usdt.h"
7 
8 #include "spdk/env.h"
9 #include "spdk/log.h"
10 #include "spdk/queue.h"
11 #include "spdk/util.h"
12 
13 #include "spdk/fd_group.h"
14 
15 #define SPDK_MAX_EVENT_NAME_LEN 256
16 
17 enum event_handler_state {
18 	/* The event_handler is added into an fd_group waiting for event,
19 	 * but not currently in the execution of a wait loop.
20 	 */
21 	EVENT_HANDLER_STATE_WAITING,
22 
23 	/* The event_handler is currently in the execution of a wait loop. */
24 	EVENT_HANDLER_STATE_RUNNING,
25 
26 	/* The event_handler was removed during the execution of a wait loop. */
27 	EVENT_HANDLER_STATE_REMOVED,
28 };
29 
30 /* Taking "ehdlr" as short name for file descriptor handler of the interrupt event. */
31 struct event_handler {
32 	TAILQ_ENTRY(event_handler)	next;
33 	enum event_handler_state	state;
34 
35 	spdk_fd_fn			fn;
36 	void				*fn_arg;
37 	/* file descriptor of the interrupt event */
38 	int				fd;
39 	uint32_t			events;
40 	uint32_t			fd_type;
41 	char				name[SPDK_MAX_EVENT_NAME_LEN + 1];
42 };
43 
44 struct spdk_fd_group {
45 	int epfd;
46 
47 	/* Number of fds registered in this group. The epoll file descriptor of this fd group
48 	 * i.e. epfd waits for interrupt event on all the fds from its interrupt sources list, as
49 	 * well as from all its children fd group interrupt sources list.
50 	 */
51 	uint32_t num_fds;
52 
53 	struct spdk_fd_group *parent;
54 
55 	/* interrupt sources list */
56 	TAILQ_HEAD(, event_handler) event_handlers;
57 };
58 
59 int
60 spdk_fd_group_get_fd(struct spdk_fd_group *fgrp)
61 {
62 	return fgrp->epfd;
63 }
64 
65 #ifdef __linux__
66 
67 static __thread struct epoll_event *g_event = NULL;
68 
69 int
70 spdk_fd_group_get_epoll_event(struct epoll_event *event)
71 {
72 	if (g_event == NULL) {
73 		return -EINVAL;
74 	}
75 	*event = *g_event;
76 	return 0;
77 }
78 
79 static int
80 _fd_group_del_all(int epfd, struct spdk_fd_group *grp)
81 {
82 	struct event_handler *ehdlr = NULL;
83 	struct epoll_event epevent = {0};
84 	int rc;
85 	int ret = 0;
86 
87 	TAILQ_FOREACH(ehdlr, &grp->event_handlers, next) {
88 		rc = epoll_ctl(epfd, EPOLL_CTL_DEL, ehdlr->fd, NULL);
89 		if (rc < 0) {
90 			if (errno == ENOENT) {
91 				/* This is treated as success. It happens if there are multiple
92 				 * attempts to remove fds from the group.
93 				 */
94 				continue;
95 			}
96 
97 			ret = -errno;
98 			SPDK_ERRLOG("Failed to remove fd: %d from group: %s\n",
99 				    ehdlr->fd, strerror(errno));
100 			goto recover;
101 		}
102 		ret++;
103 	}
104 
105 	return ret;
106 
107 recover:
108 	/* We failed to remove everything. Let's try to put everything back into
109 	 * the original group. */
110 	TAILQ_FOREACH(ehdlr, &grp->event_handlers, next) {
111 		epevent.events = ehdlr->events;
112 		epevent.data.ptr = ehdlr;
113 		rc = epoll_ctl(epfd, EPOLL_CTL_ADD, ehdlr->fd, &epevent);
114 		if (rc < 0) {
115 			if (errno == EEXIST) {
116 				/* This is fine. Keep going. */
117 				continue;
118 			}
119 
120 			/* Continue on even though we've failed. But indicate
121 			 * this is a fatal error. */
122 			SPDK_ERRLOG("Failed to recover fd_group_del_all: %s\n", strerror(errno));
123 			ret = -ENOTRECOVERABLE;
124 		}
125 	}
126 
127 	return ret;
128 }
129 
130 static int
131 _fd_group_add_all(int epfd, struct spdk_fd_group *grp)
132 {
133 	struct event_handler *ehdlr = NULL;
134 	struct epoll_event epevent = {0};
135 	int rc;
136 	int ret = 0;
137 
138 	/* Hoist the fds from the child up into the parent */
139 	TAILQ_FOREACH(ehdlr, &grp->event_handlers, next) {
140 		epevent.events = ehdlr->events;
141 		epevent.data.ptr = ehdlr;
142 		rc = epoll_ctl(epfd, EPOLL_CTL_ADD, ehdlr->fd, &epevent);
143 		if (rc < 0) {
144 			if (errno == EEXIST) {
145 				/* This is treated as success */
146 				continue;
147 			}
148 
149 			ret = -errno;
150 			SPDK_ERRLOG("Failed to add fd: %d to fd group: %s\n",
151 				    ehdlr->fd, strerror(errno));
152 			goto recover;
153 		}
154 		ret++;
155 	}
156 
157 	return ret;
158 
159 recover:
160 	/* We failed to add everything, so try to remove what we did add. */
161 	TAILQ_FOREACH(ehdlr, &grp->event_handlers, next) {
162 		rc = epoll_ctl(epfd, EPOLL_CTL_DEL, ehdlr->fd, NULL);
163 		if (rc < 0) {
164 			if (errno == ENOENT) {
165 				/* This is treated as success. */
166 				continue;
167 			}
168 
169 
170 			/* Continue on even though we've failed. But indicate
171 			 * this is a fatal error. */
172 			SPDK_ERRLOG("Failed to recover fd_group_del_all: %s\n", strerror(errno));
173 			ret = -ENOTRECOVERABLE;
174 		}
175 	}
176 
177 	return ret;
178 }
179 
180 int
181 spdk_fd_group_unnest(struct spdk_fd_group *parent, struct spdk_fd_group *child)
182 {
183 	int rc;
184 
185 	if (parent == NULL || child == NULL) {
186 		return -EINVAL;
187 	}
188 
189 	if (child->parent != parent) {
190 		return -EINVAL;
191 	}
192 
193 	rc = _fd_group_del_all(parent->epfd, child);
194 	if (rc < 0) {
195 		return rc;
196 	} else {
197 		assert(parent->num_fds >= (uint32_t)rc);
198 		parent->num_fds -= rc;
199 	}
200 
201 	child->parent = NULL;
202 
203 	rc = _fd_group_add_all(child->epfd, child);
204 	if (rc < 0) {
205 		return rc;
206 	} else {
207 		child->num_fds += rc;
208 	}
209 
210 	return 0;
211 }
212 
213 int
214 spdk_fd_group_nest(struct spdk_fd_group *parent, struct spdk_fd_group *child)
215 {
216 	int rc;
217 
218 	if (parent == NULL || child == NULL) {
219 		return -EINVAL;
220 	}
221 
222 	if (child->parent) {
223 		return -EINVAL;
224 	}
225 
226 	if (parent->parent) {
227 		/* More than one layer of nesting is currently not supported */
228 		assert(false);
229 		return -ENOTSUP;
230 	}
231 
232 	rc = _fd_group_del_all(child->epfd, child);
233 	if (rc < 0) {
234 		return rc;
235 	} else {
236 		assert(child->num_fds >= (uint32_t)rc);
237 		child->num_fds -= rc;
238 	}
239 
240 	child->parent = parent;
241 
242 	rc =  _fd_group_add_all(parent->epfd, child);
243 	if (rc < 0) {
244 		return rc;
245 	} else {
246 		parent->num_fds += rc;
247 	}
248 
249 	return 0;
250 }
251 
252 void
253 spdk_fd_group_get_default_event_handler_opts(struct spdk_event_handler_opts *opts,
254 		size_t opts_size)
255 {
256 	if (!opts) {
257 		SPDK_ERRLOG("opts should not be NULL\n");
258 		return;
259 	}
260 
261 	if (!opts_size) {
262 		SPDK_ERRLOG("opts_size should not be zero value\n");
263 		return;
264 	}
265 
266 	memset(opts, 0, opts_size);
267 	opts->opts_size = opts_size;
268 
269 #define FIELD_OK(field) \
270         offsetof(struct spdk_event_handler_opts, field) + sizeof(opts->field) <= opts_size
271 
272 #define SET_FIELD(field, value) \
273         if (FIELD_OK(field)) { \
274                 opts->field = value; \
275         } \
276 
277 	SET_FIELD(events, EPOLLIN);
278 	SET_FIELD(fd_type, SPDK_FD_TYPE_DEFAULT);
279 
280 #undef FIELD_OK
281 #undef SET_FIELD
282 }
283 
284 static void
285 event_handler_opts_copy(const struct spdk_event_handler_opts *src,
286 			struct spdk_event_handler_opts *dst)
287 {
288 	if (!src->opts_size) {
289 		SPDK_ERRLOG("opts_size should not be zero value\n");
290 		assert(false);
291 	}
292 
293 #define FIELD_OK(field) \
294         offsetof(struct spdk_event_handler_opts, field) + sizeof(src->field) <= src->opts_size
295 
296 #define SET_FIELD(field) \
297         if (FIELD_OK(field)) { \
298                 dst->field = src->field; \
299         } \
300 
301 	SET_FIELD(events);
302 	SET_FIELD(fd_type);
303 
304 	dst->opts_size = src->opts_size;
305 
306 	/* You should not remove this statement, but need to update the assert statement
307 	 * if you add a new field, and also add a corresponding SET_FIELD statement */
308 	SPDK_STATIC_ASSERT(sizeof(struct spdk_event_handler_opts) == 16, "Incorrect size");
309 
310 #undef FIELD_OK
311 #undef SET_FIELD
312 }
313 
314 int
315 spdk_fd_group_add(struct spdk_fd_group *fgrp, int efd, spdk_fd_fn fn,
316 		  void *arg, const char *name)
317 {
318 	return spdk_fd_group_add_for_events(fgrp, efd, EPOLLIN, fn, arg, name);
319 }
320 
321 int
322 spdk_fd_group_add_for_events(struct spdk_fd_group *fgrp, int efd, uint32_t events,
323 			     spdk_fd_fn fn, void *arg, const char *name)
324 {
325 	struct spdk_event_handler_opts opts = {};
326 
327 	spdk_fd_group_get_default_event_handler_opts(&opts, sizeof(opts));
328 	opts.events = events;
329 	opts.fd_type = SPDK_FD_TYPE_DEFAULT;
330 
331 	return spdk_fd_group_add_ext(fgrp, efd, fn, arg, name, &opts);
332 }
333 
334 int
335 spdk_fd_group_add_ext(struct spdk_fd_group *fgrp, int efd, spdk_fd_fn fn, void *arg,
336 		      const char *name, struct spdk_event_handler_opts *opts)
337 {
338 	struct event_handler *ehdlr = NULL;
339 	struct epoll_event epevent = {0};
340 	struct spdk_event_handler_opts eh_opts = {};
341 	int rc;
342 	int epfd;
343 
344 	/* parameter checking */
345 	if (fgrp == NULL || efd < 0 || fn == NULL) {
346 		return -EINVAL;
347 	}
348 
349 	spdk_fd_group_get_default_event_handler_opts(&eh_opts, sizeof(eh_opts));
350 	if (opts) {
351 		event_handler_opts_copy(opts, &eh_opts);
352 	}
353 
354 	/* check if there is already one function registered for this fd */
355 	TAILQ_FOREACH(ehdlr, &fgrp->event_handlers, next) {
356 		if (ehdlr->fd == efd) {
357 			return -EEXIST;
358 		}
359 	}
360 
361 	/* create a new event src */
362 	ehdlr = calloc(1, sizeof(*ehdlr));
363 	if (ehdlr == NULL) {
364 		return -errno;
365 	}
366 
367 	ehdlr->fd = efd;
368 	ehdlr->fn = fn;
369 	ehdlr->fn_arg = arg;
370 	ehdlr->state = EVENT_HANDLER_STATE_WAITING;
371 	ehdlr->events = eh_opts.events;
372 	ehdlr->fd_type = eh_opts.fd_type;
373 	snprintf(ehdlr->name, sizeof(ehdlr->name), "%s", name);
374 
375 	if (fgrp->parent) {
376 		epfd = fgrp->parent->epfd;
377 	} else {
378 		epfd = fgrp->epfd;
379 	}
380 
381 	epevent.events = ehdlr->events;
382 	epevent.data.ptr = ehdlr;
383 	rc = epoll_ctl(epfd, EPOLL_CTL_ADD, efd, &epevent);
384 	if (rc < 0) {
385 		SPDK_ERRLOG("Failed to add fd: %d to fd group(%p): %s\n",
386 			    efd, fgrp, strerror(errno));
387 		free(ehdlr);
388 		return -errno;
389 	}
390 
391 	TAILQ_INSERT_TAIL(&fgrp->event_handlers, ehdlr, next);
392 	if (fgrp->parent) {
393 		fgrp->parent->num_fds++;
394 	} else {
395 		fgrp->num_fds++;
396 	}
397 
398 	return 0;
399 }
400 
401 void
402 spdk_fd_group_remove(struct spdk_fd_group *fgrp, int efd)
403 {
404 	struct event_handler *ehdlr;
405 	int rc;
406 	int epfd;
407 
408 	if (fgrp == NULL || efd < 0) {
409 		SPDK_ERRLOG("Cannot remove fd: %d from fd group(%p)\n", efd, fgrp);
410 		assert(0);
411 		return;
412 	}
413 
414 
415 	TAILQ_FOREACH(ehdlr, &fgrp->event_handlers, next) {
416 		if (ehdlr->fd == efd) {
417 			break;
418 		}
419 	}
420 
421 	if (ehdlr == NULL) {
422 		SPDK_ERRLOG("fd: %d doesn't exist in fd group(%p)\n", efd, fgrp);
423 		return;
424 	}
425 
426 	assert(ehdlr->state != EVENT_HANDLER_STATE_REMOVED);
427 
428 	if (fgrp->parent) {
429 		epfd = fgrp->parent->epfd;
430 	} else {
431 		epfd = fgrp->epfd;
432 	}
433 
434 	rc = epoll_ctl(epfd, EPOLL_CTL_DEL, ehdlr->fd, NULL);
435 	if (rc < 0) {
436 		SPDK_ERRLOG("Failed to remove fd: %d from fd group(%p): %s\n",
437 			    ehdlr->fd, fgrp, strerror(errno));
438 		return;
439 	}
440 
441 	if (fgrp->parent) {
442 		assert(fgrp->parent->num_fds > 0);
443 		fgrp->parent->num_fds--;
444 	} else {
445 		assert(fgrp->num_fds > 0);
446 		fgrp->num_fds--;
447 	}
448 	TAILQ_REMOVE(&fgrp->event_handlers, ehdlr, next);
449 
450 	/* Delay ehdlr's free in case it is waiting for execution in fgrp wait loop */
451 	if (ehdlr->state == EVENT_HANDLER_STATE_RUNNING) {
452 		ehdlr->state = EVENT_HANDLER_STATE_REMOVED;
453 	} else {
454 		free(ehdlr);
455 	}
456 }
457 
458 int
459 spdk_fd_group_event_modify(struct spdk_fd_group *fgrp,
460 			   int efd, int event_types)
461 {
462 	struct epoll_event epevent;
463 	struct event_handler *ehdlr;
464 	int epfd;
465 
466 	if (fgrp == NULL || efd < 0) {
467 		return -EINVAL;
468 	}
469 
470 	TAILQ_FOREACH(ehdlr, &fgrp->event_handlers, next) {
471 		if (ehdlr->fd == efd) {
472 			break;
473 		}
474 	}
475 
476 	if (ehdlr == NULL) {
477 		return -EINVAL;
478 	}
479 
480 	assert(ehdlr->state != EVENT_HANDLER_STATE_REMOVED);
481 
482 	ehdlr->events = event_types;
483 
484 	if (fgrp->parent) {
485 		epfd = fgrp->parent->epfd;
486 	} else {
487 		epfd = fgrp->epfd;
488 	}
489 
490 	epevent.events = ehdlr->events;
491 	epevent.data.ptr = ehdlr;
492 
493 	return epoll_ctl(epfd, EPOLL_CTL_MOD, ehdlr->fd, &epevent);
494 }
495 
496 int
497 spdk_fd_group_create(struct spdk_fd_group **_egrp)
498 {
499 	struct spdk_fd_group *fgrp;
500 
501 	if (_egrp == NULL) {
502 		return -EINVAL;
503 	}
504 
505 	fgrp = calloc(1, sizeof(*fgrp));
506 	if (fgrp == NULL) {
507 		return -ENOMEM;
508 	}
509 
510 	/* init the event source head */
511 	TAILQ_INIT(&fgrp->event_handlers);
512 
513 	fgrp->num_fds = 0;
514 	fgrp->epfd = epoll_create1(EPOLL_CLOEXEC);
515 	if (fgrp->epfd < 0) {
516 		free(fgrp);
517 		return -errno;
518 	}
519 
520 	*_egrp = fgrp;
521 
522 	return 0;
523 }
524 
525 void
526 spdk_fd_group_destroy(struct spdk_fd_group *fgrp)
527 {
528 	if (fgrp == NULL || fgrp->num_fds > 0) {
529 		if (!fgrp) {
530 			SPDK_ERRLOG("fd_group doesn't exist.\n");
531 		} else {
532 			SPDK_ERRLOG("Cannot delete fd group(%p) as (%u) fds are still registered to it.\n",
533 				    fgrp, fgrp->num_fds);
534 		}
535 		assert(0);
536 		return;
537 	}
538 
539 	/* Check if someone tried to delete the fd group before unnesting it */
540 	if (!TAILQ_EMPTY(&fgrp->event_handlers)) {
541 		SPDK_ERRLOG("Interrupt sources list not empty.\n");
542 		assert(0);
543 		return;
544 	}
545 
546 	close(fgrp->epfd);
547 	free(fgrp);
548 
549 	return;
550 }
551 
552 int
553 spdk_fd_group_wait(struct spdk_fd_group *fgrp, int timeout)
554 {
555 	uint32_t totalfds = fgrp->num_fds;
556 	struct epoll_event events[totalfds];
557 	struct event_handler *ehdlr;
558 	uint64_t count;
559 	int n;
560 	int nfds;
561 	int bytes_read;
562 	int read_errno;
563 
564 	if (fgrp->parent != NULL) {
565 		if (timeout < 0) {
566 			SPDK_ERRLOG("Calling spdk_fd_group_wait on a group nested in another group without a timeout will block indefinitely.\n");
567 			assert(false);
568 			return -EINVAL;
569 		} else {
570 			SPDK_WARNLOG("Calling spdk_fd_group_wait on a group nested in another group will never find any events.\n");
571 			return 0;
572 		}
573 	}
574 
575 	nfds = epoll_wait(fgrp->epfd, events, totalfds, timeout);
576 	if (nfds < 0) {
577 		if (errno != EINTR) {
578 			SPDK_ERRLOG("fd group(%p) epoll_wait failed: %s\n",
579 				    fgrp, strerror(errno));
580 		}
581 
582 		return -errno;
583 	} else if (nfds == 0) {
584 		return 0;
585 	}
586 
587 	for (n = 0; n < nfds; n++) {
588 		/* find the event_handler */
589 		ehdlr = events[n].data.ptr;
590 
591 		if (ehdlr == NULL) {
592 			continue;
593 		}
594 
595 		/* Tag ehdlr as running state in case that it is removed
596 		 * during this wait loop but before or when it get executed.
597 		 */
598 		assert(ehdlr->state == EVENT_HANDLER_STATE_WAITING);
599 		ehdlr->state = EVENT_HANDLER_STATE_RUNNING;
600 	}
601 
602 	for (n = 0; n < nfds; n++) {
603 		/* find the event_handler */
604 		ehdlr = events[n].data.ptr;
605 
606 		if (ehdlr == NULL || ehdlr->fn == NULL) {
607 			continue;
608 		}
609 
610 		/* It is possible that the ehdlr was removed
611 		 * during this wait loop but before it get executed.
612 		 */
613 		if (ehdlr->state == EVENT_HANDLER_STATE_REMOVED) {
614 			free(ehdlr);
615 			continue;
616 		}
617 
618 		g_event = &events[n];
619 
620 		/* read fd to reset the internal eventfd object counter value to 0 */
621 		if (ehdlr->fd_type == SPDK_FD_TYPE_EVENTFD) {
622 			bytes_read = read(ehdlr->fd, &count, sizeof(count));
623 			if (bytes_read < 0) {
624 				g_event = NULL;
625 				if (errno == EINTR || errno == EWOULDBLOCK || errno == EAGAIN) {
626 					continue;
627 				}
628 				read_errno = errno;
629 				/* TODO: Device is buggy. Handle this properly */
630 				SPDK_ERRLOG("Failed to read fd (%d) %s\n",
631 					    ehdlr->fd, strerror(errno));
632 				return -read_errno;
633 			} else if (bytes_read == 0) {
634 				SPDK_ERRLOG("Read nothing from fd (%d)\n", ehdlr->fd);
635 				g_event = NULL;
636 				return -EINVAL;
637 			}
638 		}
639 
640 		/* call the interrupt response function */
641 		ehdlr->fn(ehdlr->fn_arg);
642 		g_event = NULL;
643 
644 		/* It is possible that the ehdlr was removed
645 		 * during this wait loop when it get executed.
646 		 */
647 		if (ehdlr->state == EVENT_HANDLER_STATE_REMOVED) {
648 			free(ehdlr);
649 		} else {
650 			ehdlr->state = EVENT_HANDLER_STATE_WAITING;
651 		}
652 	}
653 
654 	return nfds;
655 }
656 
657 #else /* !__linux__ */
658 
659 int
660 spdk_fd_group_get_epoll_event(struct epoll_event *event)
661 {
662 	return -ENOTSUP;
663 }
664 
665 int
666 spdk_fd_group_add(struct spdk_fd_group *fgrp, int efd, spdk_fd_fn fn,
667 		  void *arg, const char *name)
668 {
669 	return -ENOTSUP;
670 }
671 
672 int
673 spdk_fd_group_add_for_events(struct spdk_fd_group *fgrp, int efd, uint32_t events, spdk_fd_fn fn,
674 			     void *arg, const char *name)
675 {
676 	return -ENOTSUP;
677 }
678 
679 int
680 spdk_fd_group_add_ext(struct spdk_fd_group *fgrp, int efd, spdk_fd_fn fn, void *arg,
681 		      const char *name, struct spdk_event_handler_opts *opts)
682 {
683 	return -ENOTSUP;
684 }
685 
686 void
687 spdk_fd_group_get_default_event_handler_opts(struct spdk_event_handler_opts *opts,
688 		size_t opts_size)
689 {
690 	assert(false);
691 }
692 
693 void
694 spdk_fd_group_remove(struct spdk_fd_group *fgrp, int efd)
695 {
696 }
697 
698 int
699 spdk_fd_group_event_modify(struct spdk_fd_group *fgrp,
700 			   int efd, int event_types)
701 {
702 	return -ENOTSUP;
703 }
704 
705 int
706 spdk_fd_group_create(struct spdk_fd_group **fgrp)
707 {
708 	return -ENOTSUP;
709 }
710 
711 void
712 spdk_fd_group_destroy(struct spdk_fd_group *fgrp)
713 {
714 }
715 
716 int
717 spdk_fd_group_wait(struct spdk_fd_group *fgrp, int timeout)
718 {
719 	return -ENOTSUP;
720 }
721 
722 int
723 spdk_fd_group_unnest(struct spdk_fd_group *parent, struct spdk_fd_group *child)
724 {
725 	return -ENOTSUP;
726 }
727 
728 int
729 spdk_fd_group_nest(struct spdk_fd_group *parent, struct spdk_fd_group *child)
730 {
731 	return -ENOTSUP;
732 }
733 
734 #endif /* __linux__ */
735