1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2020 Intel Corporation. All rights reserved. 3 * All rights reserved. 4 */ 5 6 #include "spdk_internal/usdt.h" 7 8 #include "spdk/env.h" 9 #include "spdk/log.h" 10 #include "spdk/queue.h" 11 #include "spdk/util.h" 12 13 #include "spdk/fd_group.h" 14 15 #define SPDK_MAX_EVENT_NAME_LEN 256 16 17 enum event_handler_state { 18 /* The event_handler is added into an fd_group waiting for event, 19 * but not currently in the execution of a wait loop. 20 */ 21 EVENT_HANDLER_STATE_WAITING, 22 23 /* The event_handler is currently in the execution of a wait loop. */ 24 EVENT_HANDLER_STATE_RUNNING, 25 26 /* The event_handler was removed during the execution of a wait loop. */ 27 EVENT_HANDLER_STATE_REMOVED, 28 }; 29 30 /* Taking "ehdlr" as short name for file descriptor handler of the interrupt event. */ 31 struct event_handler { 32 TAILQ_ENTRY(event_handler) next; 33 enum event_handler_state state; 34 35 spdk_fd_fn fn; 36 void *fn_arg; 37 /* file descriptor of the interrupt event */ 38 int fd; 39 uint32_t events; 40 uint32_t fd_type; 41 char name[SPDK_MAX_EVENT_NAME_LEN + 1]; 42 }; 43 44 struct spdk_fd_group { 45 int epfd; 46 47 /* Number of fds registered in this group. The epoll file descriptor of this fd group 48 * i.e. epfd waits for interrupt event on all the fds from its interrupt sources list, as 49 * well as from all its children fd group interrupt sources list. 50 */ 51 uint32_t num_fds; 52 53 struct spdk_fd_group *parent; 54 55 /* interrupt sources list */ 56 TAILQ_HEAD(, event_handler) event_handlers; 57 }; 58 59 int 60 spdk_fd_group_get_fd(struct spdk_fd_group *fgrp) 61 { 62 return fgrp->epfd; 63 } 64 65 #ifdef __linux__ 66 67 static __thread struct epoll_event *g_event = NULL; 68 69 int 70 spdk_fd_group_get_epoll_event(struct epoll_event *event) 71 { 72 if (g_event == NULL) { 73 return -EINVAL; 74 } 75 *event = *g_event; 76 return 0; 77 } 78 79 static int 80 _fd_group_del_all(int epfd, struct spdk_fd_group *grp) 81 { 82 struct event_handler *ehdlr = NULL; 83 struct epoll_event epevent = {0}; 84 int rc; 85 int ret = 0; 86 87 TAILQ_FOREACH(ehdlr, &grp->event_handlers, next) { 88 rc = epoll_ctl(epfd, EPOLL_CTL_DEL, ehdlr->fd, NULL); 89 if (rc < 0) { 90 if (errno == ENOENT) { 91 /* This is treated as success. It happens if there are multiple 92 * attempts to remove fds from the group. 93 */ 94 continue; 95 } 96 97 ret = -errno; 98 SPDK_ERRLOG("Failed to remove fd: %d from group: %s\n", 99 ehdlr->fd, strerror(errno)); 100 goto recover; 101 } 102 ret++; 103 } 104 105 return ret; 106 107 recover: 108 /* We failed to remove everything. Let's try to put everything back into 109 * the original group. */ 110 TAILQ_FOREACH(ehdlr, &grp->event_handlers, next) { 111 epevent.events = ehdlr->events; 112 epevent.data.ptr = ehdlr; 113 rc = epoll_ctl(epfd, EPOLL_CTL_ADD, ehdlr->fd, &epevent); 114 if (rc < 0) { 115 if (errno == EEXIST) { 116 /* This is fine. Keep going. */ 117 continue; 118 } 119 120 /* Continue on even though we've failed. But indicate 121 * this is a fatal error. */ 122 SPDK_ERRLOG("Failed to recover fd_group_del_all: %s\n", strerror(errno)); 123 ret = -ENOTRECOVERABLE; 124 } 125 } 126 127 return ret; 128 } 129 130 static int 131 _fd_group_add_all(int epfd, struct spdk_fd_group *grp) 132 { 133 struct event_handler *ehdlr = NULL; 134 struct epoll_event epevent = {0}; 135 int rc; 136 int ret = 0; 137 138 /* Hoist the fds from the child up into the parent */ 139 TAILQ_FOREACH(ehdlr, &grp->event_handlers, next) { 140 epevent.events = ehdlr->events; 141 epevent.data.ptr = ehdlr; 142 rc = epoll_ctl(epfd, EPOLL_CTL_ADD, ehdlr->fd, &epevent); 143 if (rc < 0) { 144 if (errno == EEXIST) { 145 /* This is treated as success */ 146 continue; 147 } 148 149 ret = -errno; 150 SPDK_ERRLOG("Failed to add fd: %d to fd group: %s\n", 151 ehdlr->fd, strerror(errno)); 152 goto recover; 153 } 154 ret++; 155 } 156 157 return ret; 158 159 recover: 160 /* We failed to add everything, so try to remove what we did add. */ 161 TAILQ_FOREACH(ehdlr, &grp->event_handlers, next) { 162 rc = epoll_ctl(epfd, EPOLL_CTL_DEL, ehdlr->fd, NULL); 163 if (rc < 0) { 164 if (errno == ENOENT) { 165 /* This is treated as success. */ 166 continue; 167 } 168 169 170 /* Continue on even though we've failed. But indicate 171 * this is a fatal error. */ 172 SPDK_ERRLOG("Failed to recover fd_group_del_all: %s\n", strerror(errno)); 173 ret = -ENOTRECOVERABLE; 174 } 175 } 176 177 return ret; 178 } 179 180 int 181 spdk_fd_group_unnest(struct spdk_fd_group *parent, struct spdk_fd_group *child) 182 { 183 int rc; 184 185 if (parent == NULL || child == NULL) { 186 return -EINVAL; 187 } 188 189 if (child->parent != parent) { 190 return -EINVAL; 191 } 192 193 rc = _fd_group_del_all(parent->epfd, child); 194 if (rc < 0) { 195 return rc; 196 } else { 197 assert(parent->num_fds >= (uint32_t)rc); 198 parent->num_fds -= rc; 199 } 200 201 child->parent = NULL; 202 203 rc = _fd_group_add_all(child->epfd, child); 204 if (rc < 0) { 205 return rc; 206 } else { 207 child->num_fds += rc; 208 } 209 210 return 0; 211 } 212 213 int 214 spdk_fd_group_nest(struct spdk_fd_group *parent, struct spdk_fd_group *child) 215 { 216 int rc; 217 218 if (parent == NULL || child == NULL) { 219 return -EINVAL; 220 } 221 222 if (child->parent) { 223 return -EINVAL; 224 } 225 226 if (parent->parent) { 227 /* More than one layer of nesting is currently not supported */ 228 assert(false); 229 return -ENOTSUP; 230 } 231 232 rc = _fd_group_del_all(child->epfd, child); 233 if (rc < 0) { 234 return rc; 235 } else { 236 assert(child->num_fds >= (uint32_t)rc); 237 child->num_fds -= rc; 238 } 239 240 child->parent = parent; 241 242 rc = _fd_group_add_all(parent->epfd, child); 243 if (rc < 0) { 244 return rc; 245 } else { 246 parent->num_fds += rc; 247 } 248 249 return 0; 250 } 251 252 void 253 spdk_fd_group_get_default_event_handler_opts(struct spdk_event_handler_opts *opts, 254 size_t opts_size) 255 { 256 if (!opts) { 257 SPDK_ERRLOG("opts should not be NULL\n"); 258 return; 259 } 260 261 if (!opts_size) { 262 SPDK_ERRLOG("opts_size should not be zero value\n"); 263 return; 264 } 265 266 memset(opts, 0, opts_size); 267 opts->opts_size = opts_size; 268 269 #define FIELD_OK(field) \ 270 offsetof(struct spdk_event_handler_opts, field) + sizeof(opts->field) <= opts_size 271 272 #define SET_FIELD(field, value) \ 273 if (FIELD_OK(field)) { \ 274 opts->field = value; \ 275 } \ 276 277 SET_FIELD(events, EPOLLIN); 278 SET_FIELD(fd_type, SPDK_FD_TYPE_DEFAULT); 279 280 #undef FIELD_OK 281 #undef SET_FIELD 282 } 283 284 static void 285 event_handler_opts_copy(const struct spdk_event_handler_opts *src, 286 struct spdk_event_handler_opts *dst) 287 { 288 if (!src->opts_size) { 289 SPDK_ERRLOG("opts_size should not be zero value\n"); 290 assert(false); 291 } 292 293 #define FIELD_OK(field) \ 294 offsetof(struct spdk_event_handler_opts, field) + sizeof(src->field) <= src->opts_size 295 296 #define SET_FIELD(field) \ 297 if (FIELD_OK(field)) { \ 298 dst->field = src->field; \ 299 } \ 300 301 SET_FIELD(events); 302 SET_FIELD(fd_type); 303 304 dst->opts_size = src->opts_size; 305 306 /* You should not remove this statement, but need to update the assert statement 307 * if you add a new field, and also add a corresponding SET_FIELD statement */ 308 SPDK_STATIC_ASSERT(sizeof(struct spdk_event_handler_opts) == 16, "Incorrect size"); 309 310 #undef FIELD_OK 311 #undef SET_FIELD 312 } 313 314 int 315 spdk_fd_group_add(struct spdk_fd_group *fgrp, int efd, spdk_fd_fn fn, 316 void *arg, const char *name) 317 { 318 return spdk_fd_group_add_for_events(fgrp, efd, EPOLLIN, fn, arg, name); 319 } 320 321 int 322 spdk_fd_group_add_for_events(struct spdk_fd_group *fgrp, int efd, uint32_t events, 323 spdk_fd_fn fn, void *arg, const char *name) 324 { 325 struct spdk_event_handler_opts opts = {}; 326 327 spdk_fd_group_get_default_event_handler_opts(&opts, sizeof(opts)); 328 opts.events = events; 329 opts.fd_type = SPDK_FD_TYPE_DEFAULT; 330 331 return spdk_fd_group_add_ext(fgrp, efd, fn, arg, name, &opts); 332 } 333 334 int 335 spdk_fd_group_add_ext(struct spdk_fd_group *fgrp, int efd, spdk_fd_fn fn, void *arg, 336 const char *name, struct spdk_event_handler_opts *opts) 337 { 338 struct event_handler *ehdlr = NULL; 339 struct epoll_event epevent = {0}; 340 struct spdk_event_handler_opts eh_opts = {}; 341 int rc; 342 int epfd; 343 344 /* parameter checking */ 345 if (fgrp == NULL || efd < 0 || fn == NULL) { 346 return -EINVAL; 347 } 348 349 spdk_fd_group_get_default_event_handler_opts(&eh_opts, sizeof(eh_opts)); 350 if (opts) { 351 event_handler_opts_copy(opts, &eh_opts); 352 } 353 354 /* check if there is already one function registered for this fd */ 355 TAILQ_FOREACH(ehdlr, &fgrp->event_handlers, next) { 356 if (ehdlr->fd == efd) { 357 return -EEXIST; 358 } 359 } 360 361 /* create a new event src */ 362 ehdlr = calloc(1, sizeof(*ehdlr)); 363 if (ehdlr == NULL) { 364 return -errno; 365 } 366 367 ehdlr->fd = efd; 368 ehdlr->fn = fn; 369 ehdlr->fn_arg = arg; 370 ehdlr->state = EVENT_HANDLER_STATE_WAITING; 371 ehdlr->events = eh_opts.events; 372 ehdlr->fd_type = eh_opts.fd_type; 373 snprintf(ehdlr->name, sizeof(ehdlr->name), "%s", name); 374 375 if (fgrp->parent) { 376 epfd = fgrp->parent->epfd; 377 } else { 378 epfd = fgrp->epfd; 379 } 380 381 epevent.events = ehdlr->events; 382 epevent.data.ptr = ehdlr; 383 rc = epoll_ctl(epfd, EPOLL_CTL_ADD, efd, &epevent); 384 if (rc < 0) { 385 SPDK_ERRLOG("Failed to add fd: %d to fd group(%p): %s\n", 386 efd, fgrp, strerror(errno)); 387 free(ehdlr); 388 return -errno; 389 } 390 391 TAILQ_INSERT_TAIL(&fgrp->event_handlers, ehdlr, next); 392 if (fgrp->parent) { 393 fgrp->parent->num_fds++; 394 } else { 395 fgrp->num_fds++; 396 } 397 398 return 0; 399 } 400 401 void 402 spdk_fd_group_remove(struct spdk_fd_group *fgrp, int efd) 403 { 404 struct event_handler *ehdlr; 405 int rc; 406 int epfd; 407 408 if (fgrp == NULL || efd < 0) { 409 SPDK_ERRLOG("Cannot remove fd: %d from fd group(%p)\n", efd, fgrp); 410 assert(0); 411 return; 412 } 413 414 415 TAILQ_FOREACH(ehdlr, &fgrp->event_handlers, next) { 416 if (ehdlr->fd == efd) { 417 break; 418 } 419 } 420 421 if (ehdlr == NULL) { 422 SPDK_ERRLOG("fd: %d doesn't exist in fd group(%p)\n", efd, fgrp); 423 return; 424 } 425 426 assert(ehdlr->state != EVENT_HANDLER_STATE_REMOVED); 427 428 if (fgrp->parent) { 429 epfd = fgrp->parent->epfd; 430 } else { 431 epfd = fgrp->epfd; 432 } 433 434 rc = epoll_ctl(epfd, EPOLL_CTL_DEL, ehdlr->fd, NULL); 435 if (rc < 0) { 436 SPDK_ERRLOG("Failed to remove fd: %d from fd group(%p): %s\n", 437 ehdlr->fd, fgrp, strerror(errno)); 438 return; 439 } 440 441 if (fgrp->parent) { 442 assert(fgrp->parent->num_fds > 0); 443 fgrp->parent->num_fds--; 444 } else { 445 assert(fgrp->num_fds > 0); 446 fgrp->num_fds--; 447 } 448 TAILQ_REMOVE(&fgrp->event_handlers, ehdlr, next); 449 450 /* Delay ehdlr's free in case it is waiting for execution in fgrp wait loop */ 451 if (ehdlr->state == EVENT_HANDLER_STATE_RUNNING) { 452 ehdlr->state = EVENT_HANDLER_STATE_REMOVED; 453 } else { 454 free(ehdlr); 455 } 456 } 457 458 int 459 spdk_fd_group_event_modify(struct spdk_fd_group *fgrp, 460 int efd, int event_types) 461 { 462 struct epoll_event epevent; 463 struct event_handler *ehdlr; 464 int epfd; 465 466 if (fgrp == NULL || efd < 0) { 467 return -EINVAL; 468 } 469 470 TAILQ_FOREACH(ehdlr, &fgrp->event_handlers, next) { 471 if (ehdlr->fd == efd) { 472 break; 473 } 474 } 475 476 if (ehdlr == NULL) { 477 return -EINVAL; 478 } 479 480 assert(ehdlr->state != EVENT_HANDLER_STATE_REMOVED); 481 482 ehdlr->events = event_types; 483 484 if (fgrp->parent) { 485 epfd = fgrp->parent->epfd; 486 } else { 487 epfd = fgrp->epfd; 488 } 489 490 epevent.events = ehdlr->events; 491 epevent.data.ptr = ehdlr; 492 493 return epoll_ctl(epfd, EPOLL_CTL_MOD, ehdlr->fd, &epevent); 494 } 495 496 int 497 spdk_fd_group_create(struct spdk_fd_group **_egrp) 498 { 499 struct spdk_fd_group *fgrp; 500 501 if (_egrp == NULL) { 502 return -EINVAL; 503 } 504 505 fgrp = calloc(1, sizeof(*fgrp)); 506 if (fgrp == NULL) { 507 return -ENOMEM; 508 } 509 510 /* init the event source head */ 511 TAILQ_INIT(&fgrp->event_handlers); 512 513 fgrp->num_fds = 0; 514 fgrp->epfd = epoll_create1(EPOLL_CLOEXEC); 515 if (fgrp->epfd < 0) { 516 free(fgrp); 517 return -errno; 518 } 519 520 *_egrp = fgrp; 521 522 return 0; 523 } 524 525 void 526 spdk_fd_group_destroy(struct spdk_fd_group *fgrp) 527 { 528 if (fgrp == NULL || fgrp->num_fds > 0) { 529 if (!fgrp) { 530 SPDK_ERRLOG("fd_group doesn't exist.\n"); 531 } else { 532 SPDK_ERRLOG("Cannot delete fd group(%p) as (%u) fds are still registered to it.\n", 533 fgrp, fgrp->num_fds); 534 } 535 assert(0); 536 return; 537 } 538 539 /* Check if someone tried to delete the fd group before unnesting it */ 540 if (!TAILQ_EMPTY(&fgrp->event_handlers)) { 541 SPDK_ERRLOG("Interrupt sources list not empty.\n"); 542 assert(0); 543 return; 544 } 545 546 close(fgrp->epfd); 547 free(fgrp); 548 549 return; 550 } 551 552 int 553 spdk_fd_group_wait(struct spdk_fd_group *fgrp, int timeout) 554 { 555 uint32_t totalfds = fgrp->num_fds; 556 struct epoll_event events[totalfds]; 557 struct event_handler *ehdlr; 558 uint64_t count; 559 int n; 560 int nfds; 561 int bytes_read; 562 int read_errno; 563 564 if (fgrp->parent != NULL) { 565 if (timeout < 0) { 566 SPDK_ERRLOG("Calling spdk_fd_group_wait on a group nested in another group without a timeout will block indefinitely.\n"); 567 assert(false); 568 return -EINVAL; 569 } else { 570 SPDK_WARNLOG("Calling spdk_fd_group_wait on a group nested in another group will never find any events.\n"); 571 return 0; 572 } 573 } 574 575 nfds = epoll_wait(fgrp->epfd, events, totalfds, timeout); 576 if (nfds < 0) { 577 if (errno != EINTR) { 578 SPDK_ERRLOG("fd group(%p) epoll_wait failed: %s\n", 579 fgrp, strerror(errno)); 580 } 581 582 return -errno; 583 } else if (nfds == 0) { 584 return 0; 585 } 586 587 for (n = 0; n < nfds; n++) { 588 /* find the event_handler */ 589 ehdlr = events[n].data.ptr; 590 591 if (ehdlr == NULL) { 592 continue; 593 } 594 595 /* Tag ehdlr as running state in case that it is removed 596 * during this wait loop but before or when it get executed. 597 */ 598 assert(ehdlr->state == EVENT_HANDLER_STATE_WAITING); 599 ehdlr->state = EVENT_HANDLER_STATE_RUNNING; 600 } 601 602 for (n = 0; n < nfds; n++) { 603 /* find the event_handler */ 604 ehdlr = events[n].data.ptr; 605 606 if (ehdlr == NULL || ehdlr->fn == NULL) { 607 continue; 608 } 609 610 /* It is possible that the ehdlr was removed 611 * during this wait loop but before it get executed. 612 */ 613 if (ehdlr->state == EVENT_HANDLER_STATE_REMOVED) { 614 free(ehdlr); 615 continue; 616 } 617 618 g_event = &events[n]; 619 620 /* read fd to reset the internal eventfd object counter value to 0 */ 621 if (ehdlr->fd_type == SPDK_FD_TYPE_EVENTFD) { 622 bytes_read = read(ehdlr->fd, &count, sizeof(count)); 623 if (bytes_read < 0) { 624 g_event = NULL; 625 if (errno == EINTR || errno == EWOULDBLOCK || errno == EAGAIN) { 626 continue; 627 } 628 read_errno = errno; 629 /* TODO: Device is buggy. Handle this properly */ 630 SPDK_ERRLOG("Failed to read fd (%d) %s\n", 631 ehdlr->fd, strerror(errno)); 632 return -read_errno; 633 } else if (bytes_read == 0) { 634 SPDK_ERRLOG("Read nothing from fd (%d)\n", ehdlr->fd); 635 g_event = NULL; 636 return -EINVAL; 637 } 638 } 639 640 /* call the interrupt response function */ 641 ehdlr->fn(ehdlr->fn_arg); 642 g_event = NULL; 643 644 /* It is possible that the ehdlr was removed 645 * during this wait loop when it get executed. 646 */ 647 if (ehdlr->state == EVENT_HANDLER_STATE_REMOVED) { 648 free(ehdlr); 649 } else { 650 ehdlr->state = EVENT_HANDLER_STATE_WAITING; 651 } 652 } 653 654 return nfds; 655 } 656 657 #else /* !__linux__ */ 658 659 int 660 spdk_fd_group_get_epoll_event(struct epoll_event *event) 661 { 662 return -ENOTSUP; 663 } 664 665 int 666 spdk_fd_group_add(struct spdk_fd_group *fgrp, int efd, spdk_fd_fn fn, 667 void *arg, const char *name) 668 { 669 return -ENOTSUP; 670 } 671 672 int 673 spdk_fd_group_add_for_events(struct spdk_fd_group *fgrp, int efd, uint32_t events, spdk_fd_fn fn, 674 void *arg, const char *name) 675 { 676 return -ENOTSUP; 677 } 678 679 int 680 spdk_fd_group_add_ext(struct spdk_fd_group *fgrp, int efd, spdk_fd_fn fn, void *arg, 681 const char *name, struct spdk_event_handler_opts *opts) 682 { 683 return -ENOTSUP; 684 } 685 686 void 687 spdk_fd_group_get_default_event_handler_opts(struct spdk_event_handler_opts *opts, 688 size_t opts_size) 689 { 690 assert(false); 691 } 692 693 void 694 spdk_fd_group_remove(struct spdk_fd_group *fgrp, int efd) 695 { 696 } 697 698 int 699 spdk_fd_group_event_modify(struct spdk_fd_group *fgrp, 700 int efd, int event_types) 701 { 702 return -ENOTSUP; 703 } 704 705 int 706 spdk_fd_group_create(struct spdk_fd_group **fgrp) 707 { 708 return -ENOTSUP; 709 } 710 711 void 712 spdk_fd_group_destroy(struct spdk_fd_group *fgrp) 713 { 714 } 715 716 int 717 spdk_fd_group_wait(struct spdk_fd_group *fgrp, int timeout) 718 { 719 return -ENOTSUP; 720 } 721 722 int 723 spdk_fd_group_unnest(struct spdk_fd_group *parent, struct spdk_fd_group *child) 724 { 725 return -ENOTSUP; 726 } 727 728 int 729 spdk_fd_group_nest(struct spdk_fd_group *parent, struct spdk_fd_group *child) 730 { 731 return -ENOTSUP; 732 } 733 734 #endif /* __linux__ */ 735