1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <stdint.h> 7 #include <stdlib.h> 8 #include <pthread.h> 9 #include <sys/queue.h> 10 #include <stdarg.h> 11 #include <unistd.h> 12 #include <string.h> 13 #include <errno.h> 14 #include <inttypes.h> 15 #include <sys/epoll.h> 16 #include <sys/signalfd.h> 17 #include <sys/ioctl.h> 18 #include <sys/eventfd.h> 19 #include <assert.h> 20 #include <stdbool.h> 21 22 #include <rte_common.h> 23 #include <rte_interrupts.h> 24 #include <rte_memory.h> 25 #include <rte_launch.h> 26 #include <rte_eal.h> 27 #include <rte_per_lcore.h> 28 #include <rte_lcore.h> 29 #include <rte_branch_prediction.h> 30 #include <rte_debug.h> 31 #include <rte_log.h> 32 #include <rte_errno.h> 33 #include <rte_spinlock.h> 34 #include <rte_pause.h> 35 #include <rte_vfio.h> 36 #include <rte_eal_trace.h> 37 38 #include "eal_private.h" 39 #include "eal_vfio.h" 40 #include "eal_thread.h" 41 42 #define EAL_INTR_EPOLL_WAIT_FOREVER (-1) 43 #define NB_OTHER_INTR 1 44 45 static RTE_DEFINE_PER_LCORE(int, _epfd) = -1; /**< epoll fd per thread */ 46 47 /** 48 * union for pipe fds. 49 */ 50 union intr_pipefds{ 51 struct { 52 int pipefd[2]; 53 }; 54 struct { 55 int readfd; 56 int writefd; 57 }; 58 }; 59 60 /** 61 * union buffer for reading on different devices 62 */ 63 union rte_intr_read_buffer { 64 int uio_intr_count; /* for uio device */ 65 #ifdef VFIO_PRESENT 66 uint64_t vfio_intr_count; /* for vfio device */ 67 #endif 68 uint64_t timerfd_num; /* for timerfd */ 69 char charbuf[16]; /* for others */ 70 }; 71 72 TAILQ_HEAD(rte_intr_cb_list, rte_intr_callback); 73 TAILQ_HEAD(rte_intr_source_list, rte_intr_source); 74 75 struct rte_intr_callback { 76 TAILQ_ENTRY(rte_intr_callback) next; 77 rte_intr_callback_fn cb_fn; /**< callback address */ 78 void *cb_arg; /**< parameter for callback */ 79 uint8_t pending_delete; /**< delete after callback is called */ 80 rte_intr_unregister_callback_fn ucb_fn; /**< fn to call before cb is deleted */ 81 }; 82 83 struct rte_intr_source { 84 TAILQ_ENTRY(rte_intr_source) next; 85 struct rte_intr_handle intr_handle; /**< interrupt handle */ 86 struct rte_intr_cb_list callbacks; /**< user callbacks */ 87 uint32_t active; 88 }; 89 90 /* global spinlock for interrupt data operation */ 91 static rte_spinlock_t intr_lock = RTE_SPINLOCK_INITIALIZER; 92 93 /* union buffer for pipe read/write */ 94 static union intr_pipefds intr_pipe; 95 96 /* interrupt sources list */ 97 static struct rte_intr_source_list intr_sources; 98 99 /* interrupt handling thread */ 100 static pthread_t intr_thread; 101 102 /* VFIO interrupts */ 103 #ifdef VFIO_PRESENT 104 105 #define IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + sizeof(int)) 106 /* irq set buffer length for queue interrupts and LSC interrupt */ 107 #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \ 108 sizeof(int) * (RTE_MAX_RXTX_INTR_VEC_ID + 1)) 109 110 /* enable legacy (INTx) interrupts */ 111 static int 112 vfio_enable_intx(const struct rte_intr_handle *intr_handle) { 113 struct vfio_irq_set *irq_set; 114 char irq_set_buf[IRQ_SET_BUF_LEN]; 115 int len, ret; 116 int *fd_ptr; 117 118 len = sizeof(irq_set_buf); 119 120 /* enable INTx */ 121 irq_set = (struct vfio_irq_set *) irq_set_buf; 122 irq_set->argsz = len; 123 irq_set->count = 1; 124 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; 125 irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; 126 irq_set->start = 0; 127 fd_ptr = (int *) &irq_set->data; 128 *fd_ptr = intr_handle->fd; 129 130 ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 131 132 if (ret) { 133 RTE_LOG(ERR, EAL, "Error enabling INTx interrupts for fd %d\n", 134 intr_handle->fd); 135 return -1; 136 } 137 138 /* unmask INTx after enabling */ 139 memset(irq_set, 0, len); 140 len = sizeof(struct vfio_irq_set); 141 irq_set->argsz = len; 142 irq_set->count = 1; 143 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; 144 irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; 145 irq_set->start = 0; 146 147 ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 148 149 if (ret) { 150 RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", 151 intr_handle->fd); 152 return -1; 153 } 154 return 0; 155 } 156 157 /* disable legacy (INTx) interrupts */ 158 static int 159 vfio_disable_intx(const struct rte_intr_handle *intr_handle) { 160 struct vfio_irq_set *irq_set; 161 char irq_set_buf[IRQ_SET_BUF_LEN]; 162 int len, ret; 163 164 len = sizeof(struct vfio_irq_set); 165 166 /* mask interrupts before disabling */ 167 irq_set = (struct vfio_irq_set *) irq_set_buf; 168 irq_set->argsz = len; 169 irq_set->count = 1; 170 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK; 171 irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; 172 irq_set->start = 0; 173 174 ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 175 176 if (ret) { 177 RTE_LOG(ERR, EAL, "Error masking INTx interrupts for fd %d\n", 178 intr_handle->fd); 179 return -1; 180 } 181 182 /* disable INTx*/ 183 memset(irq_set, 0, len); 184 irq_set->argsz = len; 185 irq_set->count = 0; 186 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; 187 irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; 188 irq_set->start = 0; 189 190 ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 191 192 if (ret) { 193 RTE_LOG(ERR, EAL, 194 "Error disabling INTx interrupts for fd %d\n", intr_handle->fd); 195 return -1; 196 } 197 return 0; 198 } 199 200 /* unmask/ack legacy (INTx) interrupts */ 201 static int 202 vfio_ack_intx(const struct rte_intr_handle *intr_handle) 203 { 204 struct vfio_irq_set irq_set; 205 206 /* unmask INTx */ 207 memset(&irq_set, 0, sizeof(irq_set)); 208 irq_set.argsz = sizeof(irq_set); 209 irq_set.count = 1; 210 irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; 211 irq_set.index = VFIO_PCI_INTX_IRQ_INDEX; 212 irq_set.start = 0; 213 214 if (ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set)) { 215 RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", 216 intr_handle->fd); 217 return -1; 218 } 219 return 0; 220 } 221 222 /* enable MSI interrupts */ 223 static int 224 vfio_enable_msi(const struct rte_intr_handle *intr_handle) { 225 int len, ret; 226 char irq_set_buf[IRQ_SET_BUF_LEN]; 227 struct vfio_irq_set *irq_set; 228 int *fd_ptr; 229 230 len = sizeof(irq_set_buf); 231 232 irq_set = (struct vfio_irq_set *) irq_set_buf; 233 irq_set->argsz = len; 234 irq_set->count = 1; 235 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; 236 irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; 237 irq_set->start = 0; 238 fd_ptr = (int *) &irq_set->data; 239 *fd_ptr = intr_handle->fd; 240 241 ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 242 243 if (ret) { 244 RTE_LOG(ERR, EAL, "Error enabling MSI interrupts for fd %d\n", 245 intr_handle->fd); 246 return -1; 247 } 248 return 0; 249 } 250 251 /* disable MSI interrupts */ 252 static int 253 vfio_disable_msi(const struct rte_intr_handle *intr_handle) { 254 struct vfio_irq_set *irq_set; 255 char irq_set_buf[IRQ_SET_BUF_LEN]; 256 int len, ret; 257 258 len = sizeof(struct vfio_irq_set); 259 260 irq_set = (struct vfio_irq_set *) irq_set_buf; 261 irq_set->argsz = len; 262 irq_set->count = 0; 263 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; 264 irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; 265 irq_set->start = 0; 266 267 ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 268 269 if (ret) 270 RTE_LOG(ERR, EAL, 271 "Error disabling MSI interrupts for fd %d\n", intr_handle->fd); 272 273 return ret; 274 } 275 276 /* enable MSI-X interrupts */ 277 static int 278 vfio_enable_msix(const struct rte_intr_handle *intr_handle) { 279 int len, ret; 280 char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; 281 struct vfio_irq_set *irq_set; 282 int *fd_ptr; 283 284 len = sizeof(irq_set_buf); 285 286 irq_set = (struct vfio_irq_set *) irq_set_buf; 287 irq_set->argsz = len; 288 /* 0 < irq_set->count < RTE_MAX_RXTX_INTR_VEC_ID + 1 */ 289 irq_set->count = intr_handle->max_intr ? 290 (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID + 1 ? 291 RTE_MAX_RXTX_INTR_VEC_ID + 1 : intr_handle->max_intr) : 1; 292 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; 293 irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; 294 irq_set->start = 0; 295 fd_ptr = (int *) &irq_set->data; 296 /* INTR vector offset 0 reserve for non-efds mapping */ 297 fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = intr_handle->fd; 298 memcpy(&fd_ptr[RTE_INTR_VEC_RXTX_OFFSET], intr_handle->efds, 299 sizeof(*intr_handle->efds) * intr_handle->nb_efd); 300 301 ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 302 303 if (ret) { 304 RTE_LOG(ERR, EAL, "Error enabling MSI-X interrupts for fd %d\n", 305 intr_handle->fd); 306 return -1; 307 } 308 309 return 0; 310 } 311 312 /* disable MSI-X interrupts */ 313 static int 314 vfio_disable_msix(const struct rte_intr_handle *intr_handle) { 315 struct vfio_irq_set *irq_set; 316 char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; 317 int len, ret; 318 319 len = sizeof(struct vfio_irq_set); 320 321 irq_set = (struct vfio_irq_set *) irq_set_buf; 322 irq_set->argsz = len; 323 irq_set->count = 0; 324 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; 325 irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; 326 irq_set->start = 0; 327 328 ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 329 330 if (ret) 331 RTE_LOG(ERR, EAL, 332 "Error disabling MSI-X interrupts for fd %d\n", intr_handle->fd); 333 334 return ret; 335 } 336 337 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE 338 /* enable req notifier */ 339 static int 340 vfio_enable_req(const struct rte_intr_handle *intr_handle) 341 { 342 int len, ret; 343 char irq_set_buf[IRQ_SET_BUF_LEN]; 344 struct vfio_irq_set *irq_set; 345 int *fd_ptr; 346 347 len = sizeof(irq_set_buf); 348 349 irq_set = (struct vfio_irq_set *) irq_set_buf; 350 irq_set->argsz = len; 351 irq_set->count = 1; 352 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | 353 VFIO_IRQ_SET_ACTION_TRIGGER; 354 irq_set->index = VFIO_PCI_REQ_IRQ_INDEX; 355 irq_set->start = 0; 356 fd_ptr = (int *) &irq_set->data; 357 *fd_ptr = intr_handle->fd; 358 359 ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 360 361 if (ret) { 362 RTE_LOG(ERR, EAL, "Error enabling req interrupts for fd %d\n", 363 intr_handle->fd); 364 return -1; 365 } 366 367 return 0; 368 } 369 370 /* disable req notifier */ 371 static int 372 vfio_disable_req(const struct rte_intr_handle *intr_handle) 373 { 374 struct vfio_irq_set *irq_set; 375 char irq_set_buf[IRQ_SET_BUF_LEN]; 376 int len, ret; 377 378 len = sizeof(struct vfio_irq_set); 379 380 irq_set = (struct vfio_irq_set *) irq_set_buf; 381 irq_set->argsz = len; 382 irq_set->count = 0; 383 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; 384 irq_set->index = VFIO_PCI_REQ_IRQ_INDEX; 385 irq_set->start = 0; 386 387 ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 388 389 if (ret) 390 RTE_LOG(ERR, EAL, "Error disabling req interrupts for fd %d\n", 391 intr_handle->fd); 392 393 return ret; 394 } 395 #endif 396 #endif 397 398 static int 399 uio_intx_intr_disable(const struct rte_intr_handle *intr_handle) 400 { 401 unsigned char command_high; 402 403 /* use UIO config file descriptor for uio_pci_generic */ 404 if (pread(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) { 405 RTE_LOG(ERR, EAL, 406 "Error reading interrupts status for fd %d\n", 407 intr_handle->uio_cfg_fd); 408 return -1; 409 } 410 /* disable interrupts */ 411 command_high |= 0x4; 412 if (pwrite(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) { 413 RTE_LOG(ERR, EAL, 414 "Error disabling interrupts for fd %d\n", 415 intr_handle->uio_cfg_fd); 416 return -1; 417 } 418 419 return 0; 420 } 421 422 static int 423 uio_intx_intr_enable(const struct rte_intr_handle *intr_handle) 424 { 425 unsigned char command_high; 426 427 /* use UIO config file descriptor for uio_pci_generic */ 428 if (pread(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) { 429 RTE_LOG(ERR, EAL, 430 "Error reading interrupts status for fd %d\n", 431 intr_handle->uio_cfg_fd); 432 return -1; 433 } 434 /* enable interrupts */ 435 command_high &= ~0x4; 436 if (pwrite(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) { 437 RTE_LOG(ERR, EAL, 438 "Error enabling interrupts for fd %d\n", 439 intr_handle->uio_cfg_fd); 440 return -1; 441 } 442 443 return 0; 444 } 445 446 static int 447 uio_intr_disable(const struct rte_intr_handle *intr_handle) 448 { 449 const int value = 0; 450 451 if (write(intr_handle->fd, &value, sizeof(value)) < 0) { 452 RTE_LOG(ERR, EAL, 453 "Error disabling interrupts for fd %d (%s)\n", 454 intr_handle->fd, strerror(errno)); 455 return -1; 456 } 457 return 0; 458 } 459 460 static int 461 uio_intr_enable(const struct rte_intr_handle *intr_handle) 462 { 463 const int value = 1; 464 465 if (write(intr_handle->fd, &value, sizeof(value)) < 0) { 466 RTE_LOG(ERR, EAL, 467 "Error enabling interrupts for fd %d (%s)\n", 468 intr_handle->fd, strerror(errno)); 469 return -1; 470 } 471 return 0; 472 } 473 474 int 475 rte_intr_callback_register(const struct rte_intr_handle *intr_handle, 476 rte_intr_callback_fn cb, void *cb_arg) 477 { 478 int ret, wake_thread; 479 struct rte_intr_source *src; 480 struct rte_intr_callback *callback; 481 482 wake_thread = 0; 483 484 /* first do parameter checking */ 485 if (intr_handle == NULL || intr_handle->fd < 0 || cb == NULL) { 486 RTE_LOG(ERR, EAL, 487 "Registering with invalid input parameter\n"); 488 return -EINVAL; 489 } 490 491 /* allocate a new interrupt callback entity */ 492 callback = calloc(1, sizeof(*callback)); 493 if (callback == NULL) { 494 RTE_LOG(ERR, EAL, "Can not allocate memory\n"); 495 return -ENOMEM; 496 } 497 callback->cb_fn = cb; 498 callback->cb_arg = cb_arg; 499 callback->pending_delete = 0; 500 callback->ucb_fn = NULL; 501 502 rte_spinlock_lock(&intr_lock); 503 504 /* check if there is at least one callback registered for the fd */ 505 TAILQ_FOREACH(src, &intr_sources, next) { 506 if (src->intr_handle.fd == intr_handle->fd) { 507 /* we had no interrupts for this */ 508 if (TAILQ_EMPTY(&src->callbacks)) 509 wake_thread = 1; 510 511 TAILQ_INSERT_TAIL(&(src->callbacks), callback, next); 512 ret = 0; 513 break; 514 } 515 } 516 517 /* no existing callbacks for this - add new source */ 518 if (src == NULL) { 519 src = calloc(1, sizeof(*src)); 520 if (src == NULL) { 521 RTE_LOG(ERR, EAL, "Can not allocate memory\n"); 522 free(callback); 523 ret = -ENOMEM; 524 } else { 525 src->intr_handle = *intr_handle; 526 TAILQ_INIT(&src->callbacks); 527 TAILQ_INSERT_TAIL(&(src->callbacks), callback, next); 528 TAILQ_INSERT_TAIL(&intr_sources, src, next); 529 wake_thread = 1; 530 ret = 0; 531 } 532 } 533 534 rte_spinlock_unlock(&intr_lock); 535 536 /** 537 * check if need to notify the pipe fd waited by epoll_wait to 538 * rebuild the wait list. 539 */ 540 if (wake_thread) 541 if (write(intr_pipe.writefd, "1", 1) < 0) 542 ret = -EPIPE; 543 544 rte_eal_trace_intr_callback_register(intr_handle, cb, cb_arg, ret); 545 return ret; 546 } 547 548 int 549 rte_intr_callback_unregister_pending(const struct rte_intr_handle *intr_handle, 550 rte_intr_callback_fn cb_fn, void *cb_arg, 551 rte_intr_unregister_callback_fn ucb_fn) 552 { 553 int ret; 554 struct rte_intr_source *src; 555 struct rte_intr_callback *cb, *next; 556 557 /* do parameter checking first */ 558 if (intr_handle == NULL || intr_handle->fd < 0) { 559 RTE_LOG(ERR, EAL, 560 "Unregistering with invalid input parameter\n"); 561 return -EINVAL; 562 } 563 564 rte_spinlock_lock(&intr_lock); 565 566 /* check if the insterrupt source for the fd is existent */ 567 TAILQ_FOREACH(src, &intr_sources, next) 568 if (src->intr_handle.fd == intr_handle->fd) 569 break; 570 571 /* No interrupt source registered for the fd */ 572 if (src == NULL) { 573 ret = -ENOENT; 574 575 /* only usable if the source is active */ 576 } else if (src->active == 0) { 577 ret = -EAGAIN; 578 579 } else { 580 ret = 0; 581 582 /* walk through the callbacks and mark all that match. */ 583 for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) { 584 next = TAILQ_NEXT(cb, next); 585 if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 || 586 cb->cb_arg == cb_arg)) { 587 cb->pending_delete = 1; 588 cb->ucb_fn = ucb_fn; 589 ret++; 590 } 591 } 592 } 593 594 rte_spinlock_unlock(&intr_lock); 595 596 return ret; 597 } 598 599 int 600 rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle, 601 rte_intr_callback_fn cb_fn, void *cb_arg) 602 { 603 int ret; 604 struct rte_intr_source *src; 605 struct rte_intr_callback *cb, *next; 606 607 /* do parameter checking first */ 608 if (intr_handle == NULL || intr_handle->fd < 0) { 609 RTE_LOG(ERR, EAL, 610 "Unregistering with invalid input parameter\n"); 611 return -EINVAL; 612 } 613 614 rte_spinlock_lock(&intr_lock); 615 616 /* check if the insterrupt source for the fd is existent */ 617 TAILQ_FOREACH(src, &intr_sources, next) 618 if (src->intr_handle.fd == intr_handle->fd) 619 break; 620 621 /* No interrupt source registered for the fd */ 622 if (src == NULL) { 623 ret = -ENOENT; 624 625 /* interrupt source has some active callbacks right now. */ 626 } else if (src->active != 0) { 627 ret = -EAGAIN; 628 629 /* ok to remove. */ 630 } else { 631 ret = 0; 632 633 /*walk through the callbacks and remove all that match. */ 634 for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) { 635 636 next = TAILQ_NEXT(cb, next); 637 638 if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 || 639 cb->cb_arg == cb_arg)) { 640 TAILQ_REMOVE(&src->callbacks, cb, next); 641 free(cb); 642 ret++; 643 } 644 } 645 646 /* all callbacks for that source are removed. */ 647 if (TAILQ_EMPTY(&src->callbacks)) { 648 TAILQ_REMOVE(&intr_sources, src, next); 649 free(src); 650 } 651 } 652 653 rte_spinlock_unlock(&intr_lock); 654 655 /* notify the pipe fd waited by epoll_wait to rebuild the wait list */ 656 if (ret >= 0 && write(intr_pipe.writefd, "1", 1) < 0) { 657 ret = -EPIPE; 658 } 659 660 rte_eal_trace_intr_callback_unregister(intr_handle, cb_fn, cb_arg, 661 ret); 662 return ret; 663 } 664 665 int 666 rte_intr_callback_unregister_sync(const struct rte_intr_handle *intr_handle, 667 rte_intr_callback_fn cb_fn, void *cb_arg) 668 { 669 int ret = 0; 670 671 while ((ret = rte_intr_callback_unregister(intr_handle, cb_fn, cb_arg)) == -EAGAIN) 672 rte_pause(); 673 674 return ret; 675 } 676 677 int 678 rte_intr_enable(const struct rte_intr_handle *intr_handle) 679 { 680 int rc = 0; 681 682 if (intr_handle == NULL) 683 return -1; 684 685 if (intr_handle->type == RTE_INTR_HANDLE_VDEV) { 686 rc = 0; 687 goto out; 688 } 689 690 if (intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) { 691 rc = -1; 692 goto out; 693 } 694 695 switch (intr_handle->type){ 696 /* write to the uio fd to enable the interrupt */ 697 case RTE_INTR_HANDLE_UIO: 698 if (uio_intr_enable(intr_handle)) 699 rc = -1; 700 break; 701 case RTE_INTR_HANDLE_UIO_INTX: 702 if (uio_intx_intr_enable(intr_handle)) 703 rc = -1; 704 break; 705 /* not used at this moment */ 706 case RTE_INTR_HANDLE_ALARM: 707 rc = -1; 708 break; 709 #ifdef VFIO_PRESENT 710 case RTE_INTR_HANDLE_VFIO_MSIX: 711 if (vfio_enable_msix(intr_handle)) 712 rc = -1; 713 break; 714 case RTE_INTR_HANDLE_VFIO_MSI: 715 if (vfio_enable_msi(intr_handle)) 716 rc = -1; 717 break; 718 case RTE_INTR_HANDLE_VFIO_LEGACY: 719 if (vfio_enable_intx(intr_handle)) 720 rc = -1; 721 break; 722 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE 723 case RTE_INTR_HANDLE_VFIO_REQ: 724 if (vfio_enable_req(intr_handle)) 725 rc = -1; 726 break; 727 #endif 728 #endif 729 /* not used at this moment */ 730 case RTE_INTR_HANDLE_DEV_EVENT: 731 rc = -1; 732 break; 733 /* unknown handle type */ 734 default: 735 RTE_LOG(ERR, EAL, 736 "Unknown handle type of fd %d\n", 737 intr_handle->fd); 738 rc = -1; 739 break; 740 } 741 out: 742 rte_eal_trace_intr_enable(intr_handle, rc); 743 return rc; 744 } 745 746 /** 747 * PMD generally calls this function at the end of its IRQ callback. 748 * Internally, it unmasks the interrupt if possible. 749 * 750 * For INTx, unmasking is required as the interrupt is auto-masked prior to 751 * invoking callback. 752 * 753 * For MSI/MSI-X, unmasking is typically not needed as the interrupt is not 754 * auto-masked. In fact, for interrupt handle types VFIO_MSIX and VFIO_MSI, 755 * this function is no-op. 756 */ 757 int 758 rte_intr_ack(const struct rte_intr_handle *intr_handle) 759 { 760 if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV) 761 return 0; 762 763 if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) 764 return -1; 765 766 switch (intr_handle->type) { 767 /* Both acking and enabling are same for UIO */ 768 case RTE_INTR_HANDLE_UIO: 769 if (uio_intr_enable(intr_handle)) 770 return -1; 771 break; 772 case RTE_INTR_HANDLE_UIO_INTX: 773 if (uio_intx_intr_enable(intr_handle)) 774 return -1; 775 break; 776 /* not used at this moment */ 777 case RTE_INTR_HANDLE_ALARM: 778 return -1; 779 #ifdef VFIO_PRESENT 780 /* VFIO MSI* is implicitly acked unlike INTx, nothing to do */ 781 case RTE_INTR_HANDLE_VFIO_MSIX: 782 case RTE_INTR_HANDLE_VFIO_MSI: 783 return 0; 784 case RTE_INTR_HANDLE_VFIO_LEGACY: 785 if (vfio_ack_intx(intr_handle)) 786 return -1; 787 break; 788 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE 789 case RTE_INTR_HANDLE_VFIO_REQ: 790 return -1; 791 #endif 792 #endif 793 /* not used at this moment */ 794 case RTE_INTR_HANDLE_DEV_EVENT: 795 return -1; 796 /* unknown handle type */ 797 default: 798 RTE_LOG(ERR, EAL, "Unknown handle type of fd %d\n", 799 intr_handle->fd); 800 return -1; 801 } 802 803 return 0; 804 } 805 806 int 807 rte_intr_disable(const struct rte_intr_handle *intr_handle) 808 { 809 int rc = 0; 810 811 if (intr_handle == NULL) 812 return -1; 813 814 if (intr_handle->type == RTE_INTR_HANDLE_VDEV) { 815 rc = 0; 816 goto out; 817 } 818 819 if (intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) { 820 rc = -1; 821 goto out; 822 } 823 824 switch (intr_handle->type){ 825 /* write to the uio fd to disable the interrupt */ 826 case RTE_INTR_HANDLE_UIO: 827 if (uio_intr_disable(intr_handle)) 828 rc = -1; 829 break; 830 case RTE_INTR_HANDLE_UIO_INTX: 831 if (uio_intx_intr_disable(intr_handle)) 832 rc = -1; 833 break; 834 /* not used at this moment */ 835 case RTE_INTR_HANDLE_ALARM: 836 rc = -1; 837 break; 838 #ifdef VFIO_PRESENT 839 case RTE_INTR_HANDLE_VFIO_MSIX: 840 if (vfio_disable_msix(intr_handle)) 841 rc = -1; 842 break; 843 case RTE_INTR_HANDLE_VFIO_MSI: 844 if (vfio_disable_msi(intr_handle)) 845 rc = -1; 846 break; 847 case RTE_INTR_HANDLE_VFIO_LEGACY: 848 if (vfio_disable_intx(intr_handle)) 849 rc = -1; 850 break; 851 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE 852 case RTE_INTR_HANDLE_VFIO_REQ: 853 if (vfio_disable_req(intr_handle)) 854 rc = -1; 855 break; 856 #endif 857 #endif 858 /* not used at this moment */ 859 case RTE_INTR_HANDLE_DEV_EVENT: 860 rc = -1; 861 break; 862 /* unknown handle type */ 863 default: 864 RTE_LOG(ERR, EAL, 865 "Unknown handle type of fd %d\n", 866 intr_handle->fd); 867 rc = -1; 868 break; 869 } 870 out: 871 rte_eal_trace_intr_disable(intr_handle, rc); 872 return rc; 873 } 874 875 static int 876 eal_intr_process_interrupts(struct epoll_event *events, int nfds) 877 { 878 bool call = false; 879 int n, bytes_read, rv; 880 struct rte_intr_source *src; 881 struct rte_intr_callback *cb, *next; 882 union rte_intr_read_buffer buf; 883 struct rte_intr_callback active_cb; 884 885 for (n = 0; n < nfds; n++) { 886 887 /** 888 * if the pipe fd is ready to read, return out to 889 * rebuild the wait list. 890 */ 891 if (events[n].data.fd == intr_pipe.readfd){ 892 int r = read(intr_pipe.readfd, buf.charbuf, 893 sizeof(buf.charbuf)); 894 RTE_SET_USED(r); 895 return -1; 896 } 897 rte_spinlock_lock(&intr_lock); 898 TAILQ_FOREACH(src, &intr_sources, next) 899 if (src->intr_handle.fd == 900 events[n].data.fd) 901 break; 902 if (src == NULL){ 903 rte_spinlock_unlock(&intr_lock); 904 continue; 905 } 906 907 /* mark this interrupt source as active and release the lock. */ 908 src->active = 1; 909 rte_spinlock_unlock(&intr_lock); 910 911 /* set the length to be read dor different handle type */ 912 switch (src->intr_handle.type) { 913 case RTE_INTR_HANDLE_UIO: 914 case RTE_INTR_HANDLE_UIO_INTX: 915 bytes_read = sizeof(buf.uio_intr_count); 916 break; 917 case RTE_INTR_HANDLE_ALARM: 918 bytes_read = sizeof(buf.timerfd_num); 919 break; 920 #ifdef VFIO_PRESENT 921 case RTE_INTR_HANDLE_VFIO_MSIX: 922 case RTE_INTR_HANDLE_VFIO_MSI: 923 case RTE_INTR_HANDLE_VFIO_LEGACY: 924 bytes_read = sizeof(buf.vfio_intr_count); 925 break; 926 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE 927 case RTE_INTR_HANDLE_VFIO_REQ: 928 bytes_read = 0; 929 call = true; 930 break; 931 #endif 932 #endif 933 case RTE_INTR_HANDLE_VDEV: 934 case RTE_INTR_HANDLE_EXT: 935 bytes_read = 0; 936 call = true; 937 break; 938 case RTE_INTR_HANDLE_DEV_EVENT: 939 bytes_read = 0; 940 call = true; 941 break; 942 default: 943 bytes_read = 1; 944 break; 945 } 946 947 if (bytes_read > 0) { 948 /** 949 * read out to clear the ready-to-be-read flag 950 * for epoll_wait. 951 */ 952 bytes_read = read(events[n].data.fd, &buf, bytes_read); 953 if (bytes_read < 0) { 954 if (errno == EINTR || errno == EWOULDBLOCK) 955 continue; 956 957 RTE_LOG(ERR, EAL, "Error reading from file " 958 "descriptor %d: %s\n", 959 events[n].data.fd, 960 strerror(errno)); 961 /* 962 * The device is unplugged or buggy, remove 963 * it as an interrupt source and return to 964 * force the wait list to be rebuilt. 965 */ 966 rte_spinlock_lock(&intr_lock); 967 TAILQ_REMOVE(&intr_sources, src, next); 968 rte_spinlock_unlock(&intr_lock); 969 970 for (cb = TAILQ_FIRST(&src->callbacks); cb; 971 cb = next) { 972 next = TAILQ_NEXT(cb, next); 973 TAILQ_REMOVE(&src->callbacks, cb, next); 974 free(cb); 975 } 976 free(src); 977 return -1; 978 } else if (bytes_read == 0) 979 RTE_LOG(ERR, EAL, "Read nothing from file " 980 "descriptor %d\n", events[n].data.fd); 981 else 982 call = true; 983 } 984 985 /* grab a lock, again to call callbacks and update status. */ 986 rte_spinlock_lock(&intr_lock); 987 988 if (call) { 989 990 /* Finally, call all callbacks. */ 991 TAILQ_FOREACH(cb, &src->callbacks, next) { 992 993 /* make a copy and unlock. */ 994 active_cb = *cb; 995 rte_spinlock_unlock(&intr_lock); 996 997 /* call the actual callback */ 998 active_cb.cb_fn(active_cb.cb_arg); 999 1000 /*get the lock back. */ 1001 rte_spinlock_lock(&intr_lock); 1002 } 1003 } 1004 /* we done with that interrupt source, release it. */ 1005 src->active = 0; 1006 1007 rv = 0; 1008 1009 /* check if any callback are supposed to be removed */ 1010 for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) { 1011 next = TAILQ_NEXT(cb, next); 1012 if (cb->pending_delete) { 1013 TAILQ_REMOVE(&src->callbacks, cb, next); 1014 if (cb->ucb_fn) 1015 cb->ucb_fn(&src->intr_handle, cb->cb_arg); 1016 free(cb); 1017 rv++; 1018 } 1019 } 1020 1021 /* all callbacks for that source are removed. */ 1022 if (TAILQ_EMPTY(&src->callbacks)) { 1023 TAILQ_REMOVE(&intr_sources, src, next); 1024 free(src); 1025 } 1026 1027 /* notify the pipe fd waited by epoll_wait to rebuild the wait list */ 1028 if (rv > 0 && write(intr_pipe.writefd, "1", 1) < 0) { 1029 rte_spinlock_unlock(&intr_lock); 1030 return -EPIPE; 1031 } 1032 1033 rte_spinlock_unlock(&intr_lock); 1034 } 1035 1036 return 0; 1037 } 1038 1039 /** 1040 * It handles all the interrupts. 1041 * 1042 * @param pfd 1043 * epoll file descriptor. 1044 * @param totalfds 1045 * The number of file descriptors added in epoll. 1046 * 1047 * @return 1048 * void 1049 */ 1050 static void 1051 eal_intr_handle_interrupts(int pfd, unsigned totalfds) 1052 { 1053 struct epoll_event events[totalfds]; 1054 int nfds = 0; 1055 1056 for(;;) { 1057 nfds = epoll_wait(pfd, events, totalfds, 1058 EAL_INTR_EPOLL_WAIT_FOREVER); 1059 /* epoll_wait fail */ 1060 if (nfds < 0) { 1061 if (errno == EINTR) 1062 continue; 1063 RTE_LOG(ERR, EAL, 1064 "epoll_wait returns with fail\n"); 1065 return; 1066 } 1067 /* epoll_wait timeout, will never happens here */ 1068 else if (nfds == 0) 1069 continue; 1070 /* epoll_wait has at least one fd ready to read */ 1071 if (eal_intr_process_interrupts(events, nfds) < 0) 1072 return; 1073 } 1074 } 1075 1076 /** 1077 * It builds/rebuilds up the epoll file descriptor with all the 1078 * file descriptors being waited on. Then handles the interrupts. 1079 * 1080 * @param arg 1081 * pointer. (unused) 1082 * 1083 * @return 1084 * never return; 1085 */ 1086 static __rte_noreturn void * 1087 eal_intr_thread_main(__rte_unused void *arg) 1088 { 1089 /* host thread, never break out */ 1090 for (;;) { 1091 /* build up the epoll fd with all descriptors we are to 1092 * wait on then pass it to the handle_interrupts function 1093 */ 1094 static struct epoll_event pipe_event = { 1095 .events = EPOLLIN | EPOLLPRI, 1096 }; 1097 struct rte_intr_source *src; 1098 unsigned numfds = 0; 1099 1100 /* create epoll fd */ 1101 int pfd = epoll_create(1); 1102 if (pfd < 0) 1103 rte_panic("Cannot create epoll instance\n"); 1104 1105 pipe_event.data.fd = intr_pipe.readfd; 1106 /** 1107 * add pipe fd into wait list, this pipe is used to 1108 * rebuild the wait list. 1109 */ 1110 if (epoll_ctl(pfd, EPOLL_CTL_ADD, intr_pipe.readfd, 1111 &pipe_event) < 0) { 1112 rte_panic("Error adding fd to %d epoll_ctl, %s\n", 1113 intr_pipe.readfd, strerror(errno)); 1114 } 1115 numfds++; 1116 1117 rte_spinlock_lock(&intr_lock); 1118 1119 TAILQ_FOREACH(src, &intr_sources, next) { 1120 struct epoll_event ev; 1121 1122 if (src->callbacks.tqh_first == NULL) 1123 continue; /* skip those with no callbacks */ 1124 memset(&ev, 0, sizeof(ev)); 1125 ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP; 1126 ev.data.fd = src->intr_handle.fd; 1127 1128 /** 1129 * add all the uio device file descriptor 1130 * into wait list. 1131 */ 1132 if (epoll_ctl(pfd, EPOLL_CTL_ADD, 1133 src->intr_handle.fd, &ev) < 0){ 1134 rte_panic("Error adding fd %d epoll_ctl, %s\n", 1135 src->intr_handle.fd, strerror(errno)); 1136 } 1137 else 1138 numfds++; 1139 } 1140 rte_spinlock_unlock(&intr_lock); 1141 /* serve the interrupt */ 1142 eal_intr_handle_interrupts(pfd, numfds); 1143 1144 /** 1145 * when we return, we need to rebuild the 1146 * list of fds to monitor. 1147 */ 1148 close(pfd); 1149 } 1150 } 1151 1152 int 1153 rte_eal_intr_init(void) 1154 { 1155 int ret = 0; 1156 1157 /* init the global interrupt source head */ 1158 TAILQ_INIT(&intr_sources); 1159 1160 /** 1161 * create a pipe which will be waited by epoll and notified to 1162 * rebuild the wait list of epoll. 1163 */ 1164 if (pipe(intr_pipe.pipefd) < 0) { 1165 rte_errno = errno; 1166 return -1; 1167 } 1168 1169 /* create the host thread to wait/handle the interrupt */ 1170 ret = rte_ctrl_thread_create(&intr_thread, "eal-intr-thread", NULL, 1171 eal_intr_thread_main, NULL); 1172 if (ret != 0) { 1173 rte_errno = -ret; 1174 RTE_LOG(ERR, EAL, 1175 "Failed to create thread for interrupt handling\n"); 1176 } 1177 1178 return ret; 1179 } 1180 1181 static void 1182 eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle) 1183 { 1184 union rte_intr_read_buffer buf; 1185 int bytes_read = 0; 1186 int nbytes; 1187 1188 switch (intr_handle->type) { 1189 case RTE_INTR_HANDLE_UIO: 1190 case RTE_INTR_HANDLE_UIO_INTX: 1191 bytes_read = sizeof(buf.uio_intr_count); 1192 break; 1193 #ifdef VFIO_PRESENT 1194 case RTE_INTR_HANDLE_VFIO_MSIX: 1195 case RTE_INTR_HANDLE_VFIO_MSI: 1196 case RTE_INTR_HANDLE_VFIO_LEGACY: 1197 bytes_read = sizeof(buf.vfio_intr_count); 1198 break; 1199 #endif 1200 case RTE_INTR_HANDLE_VDEV: 1201 bytes_read = intr_handle->efd_counter_size; 1202 /* For vdev, number of bytes to read is set by driver */ 1203 break; 1204 case RTE_INTR_HANDLE_EXT: 1205 return; 1206 default: 1207 bytes_read = 1; 1208 RTE_LOG(INFO, EAL, "unexpected intr type\n"); 1209 break; 1210 } 1211 1212 /** 1213 * read out to clear the ready-to-be-read flag 1214 * for epoll_wait. 1215 */ 1216 if (bytes_read == 0) 1217 return; 1218 do { 1219 nbytes = read(fd, &buf, bytes_read); 1220 if (nbytes < 0) { 1221 if (errno == EINTR || errno == EWOULDBLOCK || 1222 errno == EAGAIN) 1223 continue; 1224 RTE_LOG(ERR, EAL, 1225 "Error reading from fd %d: %s\n", 1226 fd, strerror(errno)); 1227 } else if (nbytes == 0) 1228 RTE_LOG(ERR, EAL, "Read nothing from fd %d\n", fd); 1229 return; 1230 } while (1); 1231 } 1232 1233 static int 1234 eal_epoll_process_event(struct epoll_event *evs, unsigned int n, 1235 struct rte_epoll_event *events) 1236 { 1237 unsigned int i, count = 0; 1238 struct rte_epoll_event *rev; 1239 uint32_t valid_status; 1240 1241 for (i = 0; i < n; i++) { 1242 rev = evs[i].data.ptr; 1243 valid_status = RTE_EPOLL_VALID; 1244 /* ACQUIRE memory ordering here pairs with RELEASE 1245 * ordering below acting as a lock to synchronize 1246 * the event data updating. 1247 */ 1248 if (!rev || !__atomic_compare_exchange_n(&rev->status, 1249 &valid_status, RTE_EPOLL_EXEC, 0, 1250 __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 1251 continue; 1252 1253 events[count].status = RTE_EPOLL_VALID; 1254 events[count].fd = rev->fd; 1255 events[count].epfd = rev->epfd; 1256 events[count].epdata.event = evs[i].events; 1257 events[count].epdata.data = rev->epdata.data; 1258 if (rev->epdata.cb_fun) 1259 rev->epdata.cb_fun(rev->fd, 1260 rev->epdata.cb_arg); 1261 1262 /* the status update should be observed after 1263 * the other fields change. 1264 */ 1265 __atomic_store_n(&rev->status, RTE_EPOLL_VALID, 1266 __ATOMIC_RELEASE); 1267 count++; 1268 } 1269 return count; 1270 } 1271 1272 static inline int 1273 eal_init_tls_epfd(void) 1274 { 1275 int pfd = epoll_create(255); 1276 1277 if (pfd < 0) { 1278 RTE_LOG(ERR, EAL, 1279 "Cannot create epoll instance\n"); 1280 return -1; 1281 } 1282 return pfd; 1283 } 1284 1285 int 1286 rte_intr_tls_epfd(void) 1287 { 1288 if (RTE_PER_LCORE(_epfd) == -1) 1289 RTE_PER_LCORE(_epfd) = eal_init_tls_epfd(); 1290 1291 return RTE_PER_LCORE(_epfd); 1292 } 1293 1294 static int 1295 eal_epoll_wait(int epfd, struct rte_epoll_event *events, 1296 int maxevents, int timeout, bool interruptible) 1297 { 1298 struct epoll_event evs[maxevents]; 1299 int rc; 1300 1301 if (!events) { 1302 RTE_LOG(ERR, EAL, "rte_epoll_event can't be NULL\n"); 1303 return -1; 1304 } 1305 1306 /* using per thread epoll fd */ 1307 if (epfd == RTE_EPOLL_PER_THREAD) 1308 epfd = rte_intr_tls_epfd(); 1309 1310 while (1) { 1311 rc = epoll_wait(epfd, evs, maxevents, timeout); 1312 if (likely(rc > 0)) { 1313 /* epoll_wait has at least one fd ready to read */ 1314 rc = eal_epoll_process_event(evs, rc, events); 1315 break; 1316 } else if (rc < 0) { 1317 if (errno == EINTR) { 1318 if (interruptible) 1319 return -1; 1320 else 1321 continue; 1322 } 1323 /* epoll_wait fail */ 1324 RTE_LOG(ERR, EAL, "epoll_wait returns with fail %s\n", 1325 strerror(errno)); 1326 rc = -1; 1327 break; 1328 } else { 1329 /* rc == 0, epoll_wait timed out */ 1330 break; 1331 } 1332 } 1333 1334 return rc; 1335 } 1336 1337 int 1338 rte_epoll_wait(int epfd, struct rte_epoll_event *events, 1339 int maxevents, int timeout) 1340 { 1341 return eal_epoll_wait(epfd, events, maxevents, timeout, false); 1342 } 1343 1344 int 1345 rte_epoll_wait_interruptible(int epfd, struct rte_epoll_event *events, 1346 int maxevents, int timeout) 1347 { 1348 return eal_epoll_wait(epfd, events, maxevents, timeout, true); 1349 } 1350 1351 static inline void 1352 eal_epoll_data_safe_free(struct rte_epoll_event *ev) 1353 { 1354 uint32_t valid_status = RTE_EPOLL_VALID; 1355 1356 while (!__atomic_compare_exchange_n(&ev->status, &valid_status, 1357 RTE_EPOLL_INVALID, 0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) { 1358 while (__atomic_load_n(&ev->status, 1359 __ATOMIC_RELAXED) != RTE_EPOLL_VALID) 1360 rte_pause(); 1361 valid_status = RTE_EPOLL_VALID; 1362 } 1363 memset(&ev->epdata, 0, sizeof(ev->epdata)); 1364 ev->fd = -1; 1365 ev->epfd = -1; 1366 } 1367 1368 int 1369 rte_epoll_ctl(int epfd, int op, int fd, 1370 struct rte_epoll_event *event) 1371 { 1372 struct epoll_event ev; 1373 1374 if (!event) { 1375 RTE_LOG(ERR, EAL, "rte_epoll_event can't be NULL\n"); 1376 return -1; 1377 } 1378 1379 /* using per thread epoll fd */ 1380 if (epfd == RTE_EPOLL_PER_THREAD) 1381 epfd = rte_intr_tls_epfd(); 1382 1383 if (op == EPOLL_CTL_ADD) { 1384 __atomic_store_n(&event->status, RTE_EPOLL_VALID, 1385 __ATOMIC_RELAXED); 1386 event->fd = fd; /* ignore fd in event */ 1387 event->epfd = epfd; 1388 ev.data.ptr = (void *)event; 1389 } 1390 1391 ev.events = event->epdata.event; 1392 if (epoll_ctl(epfd, op, fd, &ev) < 0) { 1393 RTE_LOG(ERR, EAL, "Error op %d fd %d epoll_ctl, %s\n", 1394 op, fd, strerror(errno)); 1395 if (op == EPOLL_CTL_ADD) 1396 /* rollback status when CTL_ADD fail */ 1397 __atomic_store_n(&event->status, RTE_EPOLL_INVALID, 1398 __ATOMIC_RELAXED); 1399 return -1; 1400 } 1401 1402 if (op == EPOLL_CTL_DEL && __atomic_load_n(&event->status, 1403 __ATOMIC_RELAXED) != RTE_EPOLL_INVALID) 1404 eal_epoll_data_safe_free(event); 1405 1406 return 0; 1407 } 1408 1409 int 1410 rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, int epfd, 1411 int op, unsigned int vec, void *data) 1412 { 1413 struct rte_epoll_event *rev; 1414 struct rte_epoll_data *epdata; 1415 int epfd_op; 1416 unsigned int efd_idx; 1417 int rc = 0; 1418 1419 efd_idx = (vec >= RTE_INTR_VEC_RXTX_OFFSET) ? 1420 (vec - RTE_INTR_VEC_RXTX_OFFSET) : vec; 1421 1422 if (!intr_handle || intr_handle->nb_efd == 0 || 1423 efd_idx >= intr_handle->nb_efd) { 1424 RTE_LOG(ERR, EAL, "Wrong intr vector number.\n"); 1425 return -EPERM; 1426 } 1427 1428 switch (op) { 1429 case RTE_INTR_EVENT_ADD: 1430 epfd_op = EPOLL_CTL_ADD; 1431 rev = &intr_handle->elist[efd_idx]; 1432 if (__atomic_load_n(&rev->status, 1433 __ATOMIC_RELAXED) != RTE_EPOLL_INVALID) { 1434 RTE_LOG(INFO, EAL, "Event already been added.\n"); 1435 return -EEXIST; 1436 } 1437 1438 /* attach to intr vector fd */ 1439 epdata = &rev->epdata; 1440 epdata->event = EPOLLIN | EPOLLPRI | EPOLLET; 1441 epdata->data = data; 1442 epdata->cb_fun = (rte_intr_event_cb_t)eal_intr_proc_rxtx_intr; 1443 epdata->cb_arg = (void *)intr_handle; 1444 rc = rte_epoll_ctl(epfd, epfd_op, 1445 intr_handle->efds[efd_idx], rev); 1446 if (!rc) 1447 RTE_LOG(DEBUG, EAL, 1448 "efd %d associated with vec %d added on epfd %d" 1449 "\n", rev->fd, vec, epfd); 1450 else 1451 rc = -EPERM; 1452 break; 1453 case RTE_INTR_EVENT_DEL: 1454 epfd_op = EPOLL_CTL_DEL; 1455 rev = &intr_handle->elist[efd_idx]; 1456 if (__atomic_load_n(&rev->status, 1457 __ATOMIC_RELAXED) == RTE_EPOLL_INVALID) { 1458 RTE_LOG(INFO, EAL, "Event does not exist.\n"); 1459 return -EPERM; 1460 } 1461 1462 rc = rte_epoll_ctl(rev->epfd, epfd_op, rev->fd, rev); 1463 if (rc) 1464 rc = -EPERM; 1465 break; 1466 default: 1467 RTE_LOG(ERR, EAL, "event op type mismatch\n"); 1468 rc = -EPERM; 1469 } 1470 1471 return rc; 1472 } 1473 1474 void 1475 rte_intr_free_epoll_fd(struct rte_intr_handle *intr_handle) 1476 { 1477 uint32_t i; 1478 struct rte_epoll_event *rev; 1479 1480 for (i = 0; i < intr_handle->nb_efd; i++) { 1481 rev = &intr_handle->elist[i]; 1482 if (__atomic_load_n(&rev->status, 1483 __ATOMIC_RELAXED) == RTE_EPOLL_INVALID) 1484 continue; 1485 if (rte_epoll_ctl(rev->epfd, EPOLL_CTL_DEL, rev->fd, rev)) { 1486 /* force free if the entry valid */ 1487 eal_epoll_data_safe_free(rev); 1488 } 1489 } 1490 } 1491 1492 int 1493 rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd) 1494 { 1495 uint32_t i; 1496 int fd; 1497 uint32_t n = RTE_MIN(nb_efd, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 1498 1499 assert(nb_efd != 0); 1500 1501 if (intr_handle->type == RTE_INTR_HANDLE_VFIO_MSIX) { 1502 for (i = 0; i < n; i++) { 1503 fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); 1504 if (fd < 0) { 1505 RTE_LOG(ERR, EAL, 1506 "can't setup eventfd, error %i (%s)\n", 1507 errno, strerror(errno)); 1508 return -errno; 1509 } 1510 intr_handle->efds[i] = fd; 1511 } 1512 intr_handle->nb_efd = n; 1513 intr_handle->max_intr = NB_OTHER_INTR + n; 1514 } else if (intr_handle->type == RTE_INTR_HANDLE_VDEV) { 1515 /* only check, initialization would be done in vdev driver.*/ 1516 if (intr_handle->efd_counter_size > 1517 sizeof(union rte_intr_read_buffer)) { 1518 RTE_LOG(ERR, EAL, "the efd_counter_size is oversized"); 1519 return -EINVAL; 1520 } 1521 } else { 1522 intr_handle->efds[0] = intr_handle->fd; 1523 intr_handle->nb_efd = RTE_MIN(nb_efd, 1U); 1524 intr_handle->max_intr = NB_OTHER_INTR; 1525 } 1526 1527 return 0; 1528 } 1529 1530 void 1531 rte_intr_efd_disable(struct rte_intr_handle *intr_handle) 1532 { 1533 uint32_t i; 1534 1535 rte_intr_free_epoll_fd(intr_handle); 1536 if (intr_handle->max_intr > intr_handle->nb_efd) { 1537 for (i = 0; i < intr_handle->nb_efd; i++) 1538 close(intr_handle->efds[i]); 1539 } 1540 intr_handle->nb_efd = 0; 1541 intr_handle->max_intr = 0; 1542 } 1543 1544 int 1545 rte_intr_dp_is_en(struct rte_intr_handle *intr_handle) 1546 { 1547 return !(!intr_handle->nb_efd); 1548 } 1549 1550 int 1551 rte_intr_allow_others(struct rte_intr_handle *intr_handle) 1552 { 1553 if (!rte_intr_dp_is_en(intr_handle)) 1554 return 1; 1555 else 1556 return !!(intr_handle->max_intr - intr_handle->nb_efd); 1557 } 1558 1559 int 1560 rte_intr_cap_multiple(struct rte_intr_handle *intr_handle) 1561 { 1562 if (intr_handle->type == RTE_INTR_HANDLE_VFIO_MSIX) 1563 return 1; 1564 1565 if (intr_handle->type == RTE_INTR_HANDLE_VDEV) 1566 return 1; 1567 1568 return 0; 1569 } 1570 1571 int rte_thread_is_intr(void) 1572 { 1573 return pthread_equal(intr_thread, pthread_self()); 1574 } 1575