1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <stdint.h> 7 #include <stdlib.h> 8 #include <sys/queue.h> 9 #include <unistd.h> 10 #include <string.h> 11 #include <errno.h> 12 #include <sys/epoll.h> 13 #include <sys/ioctl.h> 14 #include <sys/eventfd.h> 15 #include <assert.h> 16 #include <stdbool.h> 17 18 #include <eal_trace_internal.h> 19 #include <rte_common.h> 20 #include <rte_interrupts.h> 21 #include <rte_thread.h> 22 #include <rte_per_lcore.h> 23 #include <rte_lcore.h> 24 #include <rte_branch_prediction.h> 25 #include <rte_debug.h> 26 #include <rte_log.h> 27 #include <rte_errno.h> 28 #include <rte_spinlock.h> 29 #include <rte_pause.h> 30 #include <rte_vfio.h> 31 32 #include "eal_private.h" 33 34 #define EAL_INTR_EPOLL_WAIT_FOREVER (-1) 35 #define NB_OTHER_INTR 1 36 37 static RTE_DEFINE_PER_LCORE(int, _epfd) = -1; /**< epoll fd per thread */ 38 39 /** 40 * union for pipe fds. 41 */ 42 union intr_pipefds{ 43 struct { 44 int pipefd[2]; 45 }; 46 struct { 47 int readfd; 48 int writefd; 49 }; 50 }; 51 52 /** 53 * union buffer for reading on different devices 54 */ 55 union rte_intr_read_buffer { 56 int uio_intr_count; /* for uio device */ 57 #ifdef VFIO_PRESENT 58 uint64_t vfio_intr_count; /* for vfio device */ 59 #endif 60 uint64_t timerfd_num; /* for timerfd */ 61 char charbuf[16]; /* for others */ 62 }; 63 64 TAILQ_HEAD(rte_intr_cb_list, rte_intr_callback); 65 TAILQ_HEAD(rte_intr_source_list, rte_intr_source); 66 67 struct rte_intr_callback { 68 TAILQ_ENTRY(rte_intr_callback) next; 69 rte_intr_callback_fn cb_fn; /**< callback address */ 70 void *cb_arg; /**< parameter for callback */ 71 uint8_t pending_delete; /**< delete after callback is called */ 72 rte_intr_unregister_callback_fn ucb_fn; /**< fn to call before cb is deleted */ 73 }; 74 75 struct rte_intr_source { 76 TAILQ_ENTRY(rte_intr_source) next; 77 struct rte_intr_handle *intr_handle; /**< interrupt handle */ 78 struct rte_intr_cb_list callbacks; /**< user callbacks */ 79 uint32_t active; 80 }; 81 82 /* global spinlock for interrupt data operation */ 83 static rte_spinlock_t intr_lock = RTE_SPINLOCK_INITIALIZER; 84 85 /* union buffer for pipe read/write */ 86 static union intr_pipefds intr_pipe; 87 88 /* interrupt sources list */ 89 static struct rte_intr_source_list intr_sources; 90 91 /* interrupt handling thread */ 92 static rte_thread_t intr_thread; 93 94 /* VFIO interrupts */ 95 #ifdef VFIO_PRESENT 96 97 #define IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + sizeof(int)) 98 /* irq set buffer length for queue interrupts and LSC interrupt */ 99 #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \ 100 sizeof(int) * (RTE_MAX_RXTX_INTR_VEC_ID + 1)) 101 102 /* enable legacy (INTx) interrupts */ 103 static int 104 vfio_enable_intx(const struct rte_intr_handle *intr_handle) { 105 struct vfio_irq_set *irq_set; 106 char irq_set_buf[IRQ_SET_BUF_LEN]; 107 int len, ret, vfio_dev_fd; 108 int *fd_ptr; 109 110 len = sizeof(irq_set_buf); 111 112 /* enable INTx */ 113 irq_set = (struct vfio_irq_set *) irq_set_buf; 114 irq_set->argsz = len; 115 irq_set->count = 1; 116 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; 117 irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; 118 irq_set->start = 0; 119 fd_ptr = (int *) &irq_set->data; 120 *fd_ptr = rte_intr_fd_get(intr_handle); 121 122 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 123 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 124 125 if (ret) { 126 EAL_LOG(ERR, "Error enabling INTx interrupts for fd %d", 127 rte_intr_fd_get(intr_handle)); 128 return -1; 129 } 130 131 /* unmask INTx after enabling */ 132 memset(irq_set, 0, len); 133 len = sizeof(struct vfio_irq_set); 134 irq_set->argsz = len; 135 irq_set->count = 1; 136 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; 137 irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; 138 irq_set->start = 0; 139 140 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 141 142 if (ret) { 143 EAL_LOG(ERR, "Error unmasking INTx interrupts for fd %d", 144 rte_intr_fd_get(intr_handle)); 145 return -1; 146 } 147 return 0; 148 } 149 150 /* disable legacy (INTx) interrupts */ 151 static int 152 vfio_disable_intx(const struct rte_intr_handle *intr_handle) { 153 struct vfio_irq_set *irq_set; 154 char irq_set_buf[IRQ_SET_BUF_LEN]; 155 int len, ret, vfio_dev_fd; 156 157 len = sizeof(struct vfio_irq_set); 158 159 /* mask interrupts before disabling */ 160 irq_set = (struct vfio_irq_set *) irq_set_buf; 161 irq_set->argsz = len; 162 irq_set->count = 1; 163 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK; 164 irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; 165 irq_set->start = 0; 166 167 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 168 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 169 170 if (ret) { 171 EAL_LOG(ERR, "Error masking INTx interrupts for fd %d", 172 rte_intr_fd_get(intr_handle)); 173 return -1; 174 } 175 176 /* disable INTx*/ 177 memset(irq_set, 0, len); 178 irq_set->argsz = len; 179 irq_set->count = 0; 180 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; 181 irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; 182 irq_set->start = 0; 183 184 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 185 186 if (ret) { 187 EAL_LOG(ERR, "Error disabling INTx interrupts for fd %d", 188 rte_intr_fd_get(intr_handle)); 189 return -1; 190 } 191 return 0; 192 } 193 194 /* unmask/ack legacy (INTx) interrupts */ 195 static int 196 vfio_ack_intx(const struct rte_intr_handle *intr_handle) 197 { 198 struct vfio_irq_set irq_set; 199 int vfio_dev_fd; 200 201 /* unmask INTx */ 202 memset(&irq_set, 0, sizeof(irq_set)); 203 irq_set.argsz = sizeof(irq_set); 204 irq_set.count = 1; 205 irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; 206 irq_set.index = VFIO_PCI_INTX_IRQ_INDEX; 207 irq_set.start = 0; 208 209 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 210 if (ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set)) { 211 EAL_LOG(ERR, "Error unmasking INTx interrupts for fd %d", 212 rte_intr_fd_get(intr_handle)); 213 return -1; 214 } 215 return 0; 216 } 217 218 /* enable MSI interrupts */ 219 static int 220 vfio_enable_msi(const struct rte_intr_handle *intr_handle) { 221 int len, ret; 222 char irq_set_buf[IRQ_SET_BUF_LEN]; 223 struct vfio_irq_set *irq_set; 224 int *fd_ptr, vfio_dev_fd; 225 226 len = sizeof(irq_set_buf); 227 228 irq_set = (struct vfio_irq_set *) irq_set_buf; 229 irq_set->argsz = len; 230 irq_set->count = 1; 231 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; 232 irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; 233 irq_set->start = 0; 234 fd_ptr = (int *) &irq_set->data; 235 *fd_ptr = rte_intr_fd_get(intr_handle); 236 237 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 238 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 239 240 if (ret) { 241 EAL_LOG(ERR, "Error enabling MSI interrupts for fd %d", 242 rte_intr_fd_get(intr_handle)); 243 return -1; 244 } 245 return 0; 246 } 247 248 /* disable MSI interrupts */ 249 static int 250 vfio_disable_msi(const struct rte_intr_handle *intr_handle) { 251 struct vfio_irq_set *irq_set; 252 char irq_set_buf[IRQ_SET_BUF_LEN]; 253 int len, ret, vfio_dev_fd; 254 255 len = sizeof(struct vfio_irq_set); 256 257 irq_set = (struct vfio_irq_set *) irq_set_buf; 258 irq_set->argsz = len; 259 irq_set->count = 0; 260 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; 261 irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; 262 irq_set->start = 0; 263 264 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 265 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 266 if (ret) 267 EAL_LOG(ERR, "Error disabling MSI interrupts for fd %d", 268 rte_intr_fd_get(intr_handle)); 269 270 return ret; 271 } 272 273 /* enable MSI-X interrupts */ 274 static int 275 vfio_enable_msix(const struct rte_intr_handle *intr_handle) { 276 int len, ret; 277 char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; 278 struct vfio_irq_set *irq_set; 279 int *fd_ptr, vfio_dev_fd, i; 280 281 len = sizeof(irq_set_buf); 282 283 irq_set = (struct vfio_irq_set *) irq_set_buf; 284 irq_set->argsz = len; 285 /* 0 < irq_set->count < RTE_MAX_RXTX_INTR_VEC_ID + 1 */ 286 irq_set->count = rte_intr_max_intr_get(intr_handle) ? 287 (rte_intr_max_intr_get(intr_handle) > 288 RTE_MAX_RXTX_INTR_VEC_ID + 1 ? RTE_MAX_RXTX_INTR_VEC_ID + 1 : 289 rte_intr_max_intr_get(intr_handle)) : 1; 290 291 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; 292 irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; 293 irq_set->start = 0; 294 fd_ptr = (int *) &irq_set->data; 295 /* INTR vector offset 0 reserve for non-efds mapping */ 296 fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = rte_intr_fd_get(intr_handle); 297 for (i = 0; i < rte_intr_nb_efd_get(intr_handle); i++) { 298 fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = 299 rte_intr_efds_index_get(intr_handle, i); 300 } 301 302 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 303 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 304 305 if (ret) { 306 EAL_LOG(ERR, "Error enabling MSI-X interrupts for fd %d", 307 rte_intr_fd_get(intr_handle)); 308 return -1; 309 } 310 311 return 0; 312 } 313 314 /* disable MSI-X interrupts */ 315 static int 316 vfio_disable_msix(const struct rte_intr_handle *intr_handle) { 317 struct vfio_irq_set *irq_set; 318 char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; 319 int len, ret, vfio_dev_fd; 320 321 len = sizeof(struct vfio_irq_set); 322 323 irq_set = (struct vfio_irq_set *) irq_set_buf; 324 irq_set->argsz = len; 325 irq_set->count = 0; 326 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; 327 irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; 328 irq_set->start = 0; 329 330 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 331 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 332 333 if (ret) 334 EAL_LOG(ERR, "Error disabling MSI-X interrupts for fd %d", 335 rte_intr_fd_get(intr_handle)); 336 337 return ret; 338 } 339 340 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE 341 /* enable req notifier */ 342 static int 343 vfio_enable_req(const struct rte_intr_handle *intr_handle) 344 { 345 int len, ret; 346 char irq_set_buf[IRQ_SET_BUF_LEN]; 347 struct vfio_irq_set *irq_set; 348 int *fd_ptr, vfio_dev_fd; 349 350 len = sizeof(irq_set_buf); 351 352 irq_set = (struct vfio_irq_set *) irq_set_buf; 353 irq_set->argsz = len; 354 irq_set->count = 1; 355 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | 356 VFIO_IRQ_SET_ACTION_TRIGGER; 357 irq_set->index = VFIO_PCI_REQ_IRQ_INDEX; 358 irq_set->start = 0; 359 fd_ptr = (int *) &irq_set->data; 360 *fd_ptr = rte_intr_fd_get(intr_handle); 361 362 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 363 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 364 365 if (ret) { 366 EAL_LOG(ERR, "Error enabling req interrupts for fd %d", 367 rte_intr_fd_get(intr_handle)); 368 return -1; 369 } 370 371 return 0; 372 } 373 374 /* disable req notifier */ 375 static int 376 vfio_disable_req(const struct rte_intr_handle *intr_handle) 377 { 378 struct vfio_irq_set *irq_set; 379 char irq_set_buf[IRQ_SET_BUF_LEN]; 380 int len, ret, vfio_dev_fd; 381 382 len = sizeof(struct vfio_irq_set); 383 384 irq_set = (struct vfio_irq_set *) irq_set_buf; 385 irq_set->argsz = len; 386 irq_set->count = 0; 387 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; 388 irq_set->index = VFIO_PCI_REQ_IRQ_INDEX; 389 irq_set->start = 0; 390 391 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 392 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 393 394 if (ret) 395 EAL_LOG(ERR, "Error disabling req interrupts for fd %d", 396 rte_intr_fd_get(intr_handle)); 397 398 return ret; 399 } 400 #endif 401 #endif 402 403 static int 404 uio_intx_intr_disable(const struct rte_intr_handle *intr_handle) 405 { 406 unsigned char command_high; 407 int uio_cfg_fd; 408 409 /* use UIO config file descriptor for uio_pci_generic */ 410 uio_cfg_fd = rte_intr_dev_fd_get(intr_handle); 411 if (uio_cfg_fd < 0 || pread(uio_cfg_fd, &command_high, 1, 5) != 1) { 412 EAL_LOG(ERR, 413 "Error reading interrupts status for fd %d", 414 uio_cfg_fd); 415 return -1; 416 } 417 /* disable interrupts */ 418 command_high |= 0x4; 419 if (pwrite(uio_cfg_fd, &command_high, 1, 5) != 1) { 420 EAL_LOG(ERR, 421 "Error disabling interrupts for fd %d", 422 uio_cfg_fd); 423 return -1; 424 } 425 426 return 0; 427 } 428 429 static int 430 uio_intx_intr_enable(const struct rte_intr_handle *intr_handle) 431 { 432 unsigned char command_high; 433 int uio_cfg_fd; 434 435 /* use UIO config file descriptor for uio_pci_generic */ 436 uio_cfg_fd = rte_intr_dev_fd_get(intr_handle); 437 if (uio_cfg_fd < 0 || pread(uio_cfg_fd, &command_high, 1, 5) != 1) { 438 EAL_LOG(ERR, 439 "Error reading interrupts status for fd %d", 440 uio_cfg_fd); 441 return -1; 442 } 443 /* enable interrupts */ 444 command_high &= ~0x4; 445 if (pwrite(uio_cfg_fd, &command_high, 1, 5) != 1) { 446 EAL_LOG(ERR, 447 "Error enabling interrupts for fd %d", 448 uio_cfg_fd); 449 return -1; 450 } 451 452 return 0; 453 } 454 455 static int 456 uio_intr_disable(const struct rte_intr_handle *intr_handle) 457 { 458 const int value = 0; 459 460 if (rte_intr_fd_get(intr_handle) < 0 || 461 write(rte_intr_fd_get(intr_handle), &value, sizeof(value)) < 0) { 462 EAL_LOG(ERR, "Error disabling interrupts for fd %d (%s)", 463 rte_intr_fd_get(intr_handle), strerror(errno)); 464 return -1; 465 } 466 return 0; 467 } 468 469 static int 470 uio_intr_enable(const struct rte_intr_handle *intr_handle) 471 { 472 const int value = 1; 473 474 if (rte_intr_fd_get(intr_handle) < 0 || 475 write(rte_intr_fd_get(intr_handle), &value, sizeof(value)) < 0) { 476 EAL_LOG(ERR, "Error enabling interrupts for fd %d (%s)", 477 rte_intr_fd_get(intr_handle), strerror(errno)); 478 return -1; 479 } 480 return 0; 481 } 482 483 int 484 rte_intr_callback_register(const struct rte_intr_handle *intr_handle, 485 rte_intr_callback_fn cb, void *cb_arg) 486 { 487 int ret, wake_thread; 488 struct rte_intr_source *src; 489 struct rte_intr_callback *callback; 490 491 wake_thread = 0; 492 493 /* first do parameter checking */ 494 if (rte_intr_fd_get(intr_handle) < 0 || cb == NULL) { 495 EAL_LOG(ERR, "Registering with invalid input parameter"); 496 return -EINVAL; 497 } 498 499 /* allocate a new interrupt callback entity */ 500 callback = calloc(1, sizeof(*callback)); 501 if (callback == NULL) { 502 EAL_LOG(ERR, "Can not allocate memory"); 503 return -ENOMEM; 504 } 505 callback->cb_fn = cb; 506 callback->cb_arg = cb_arg; 507 callback->pending_delete = 0; 508 callback->ucb_fn = NULL; 509 510 rte_spinlock_lock(&intr_lock); 511 512 /* check if there is at least one callback registered for the fd */ 513 TAILQ_FOREACH(src, &intr_sources, next) { 514 if (rte_intr_fd_get(src->intr_handle) == rte_intr_fd_get(intr_handle)) { 515 /* we had no interrupts for this */ 516 if (TAILQ_EMPTY(&src->callbacks)) 517 wake_thread = 1; 518 519 TAILQ_INSERT_TAIL(&(src->callbacks), callback, next); 520 ret = 0; 521 break; 522 } 523 } 524 525 /* no existing callbacks for this - add new source */ 526 if (src == NULL) { 527 src = calloc(1, sizeof(*src)); 528 if (src == NULL) { 529 EAL_LOG(ERR, "Can not allocate memory"); 530 ret = -ENOMEM; 531 free(callback); 532 callback = NULL; 533 } else { 534 src->intr_handle = rte_intr_instance_dup(intr_handle); 535 if (src->intr_handle == NULL) { 536 EAL_LOG(ERR, "Can not create intr instance"); 537 ret = -ENOMEM; 538 free(callback); 539 callback = NULL; 540 free(src); 541 src = NULL; 542 } else { 543 TAILQ_INIT(&src->callbacks); 544 TAILQ_INSERT_TAIL(&(src->callbacks), callback, 545 next); 546 TAILQ_INSERT_TAIL(&intr_sources, src, next); 547 wake_thread = 1; 548 ret = 0; 549 } 550 } 551 } 552 553 rte_spinlock_unlock(&intr_lock); 554 555 /** 556 * check if need to notify the pipe fd waited by epoll_wait to 557 * rebuild the wait list. 558 */ 559 if (wake_thread) 560 if (write(intr_pipe.writefd, "1", 1) < 0) 561 ret = -EPIPE; 562 563 rte_eal_trace_intr_callback_register(intr_handle, cb, cb_arg, ret); 564 return ret; 565 } 566 567 int 568 rte_intr_callback_unregister_pending(const struct rte_intr_handle *intr_handle, 569 rte_intr_callback_fn cb_fn, void *cb_arg, 570 rte_intr_unregister_callback_fn ucb_fn) 571 { 572 int ret; 573 struct rte_intr_source *src; 574 struct rte_intr_callback *cb, *next; 575 576 /* do parameter checking first */ 577 if (rte_intr_fd_get(intr_handle) < 0) { 578 EAL_LOG(ERR, "Unregistering with invalid input parameter"); 579 return -EINVAL; 580 } 581 582 rte_spinlock_lock(&intr_lock); 583 584 /* check if the interrupt source for the fd is existent */ 585 TAILQ_FOREACH(src, &intr_sources, next) { 586 if (rte_intr_fd_get(src->intr_handle) == rte_intr_fd_get(intr_handle)) 587 break; 588 } 589 590 /* No interrupt source registered for the fd */ 591 if (src == NULL) { 592 ret = -ENOENT; 593 594 /* only usable if the source is active */ 595 } else if (src->active == 0) { 596 ret = -EAGAIN; 597 598 } else { 599 ret = 0; 600 601 /* walk through the callbacks and mark all that match. */ 602 for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) { 603 next = TAILQ_NEXT(cb, next); 604 if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 || 605 cb->cb_arg == cb_arg)) { 606 cb->pending_delete = 1; 607 cb->ucb_fn = ucb_fn; 608 ret++; 609 } 610 } 611 } 612 613 rte_spinlock_unlock(&intr_lock); 614 615 return ret; 616 } 617 618 int 619 rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle, 620 rte_intr_callback_fn cb_fn, void *cb_arg) 621 { 622 int ret; 623 struct rte_intr_source *src; 624 struct rte_intr_callback *cb, *next; 625 626 /* do parameter checking first */ 627 if (rte_intr_fd_get(intr_handle) < 0) { 628 EAL_LOG(ERR, "Unregistering with invalid input parameter"); 629 return -EINVAL; 630 } 631 632 rte_spinlock_lock(&intr_lock); 633 634 /* check if the interrupt source for the fd is existent */ 635 TAILQ_FOREACH(src, &intr_sources, next) 636 if (rte_intr_fd_get(src->intr_handle) == rte_intr_fd_get(intr_handle)) 637 break; 638 639 /* No interrupt source registered for the fd */ 640 if (src == NULL) { 641 ret = -ENOENT; 642 643 /* interrupt source has some active callbacks right now. */ 644 } else if (src->active != 0) { 645 ret = -EAGAIN; 646 647 /* ok to remove. */ 648 } else { 649 ret = 0; 650 651 /*walk through the callbacks and remove all that match. */ 652 for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) { 653 654 next = TAILQ_NEXT(cb, next); 655 656 if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 || 657 cb->cb_arg == cb_arg)) { 658 TAILQ_REMOVE(&src->callbacks, cb, next); 659 free(cb); 660 ret++; 661 } 662 } 663 664 /* all callbacks for that source are removed. */ 665 if (TAILQ_EMPTY(&src->callbacks)) { 666 TAILQ_REMOVE(&intr_sources, src, next); 667 rte_intr_instance_free(src->intr_handle); 668 free(src); 669 } 670 } 671 672 rte_spinlock_unlock(&intr_lock); 673 674 /* notify the pipe fd waited by epoll_wait to rebuild the wait list */ 675 if (ret >= 0 && write(intr_pipe.writefd, "1", 1) < 0) { 676 ret = -EPIPE; 677 } 678 679 rte_eal_trace_intr_callback_unregister(intr_handle, cb_fn, cb_arg, 680 ret); 681 return ret; 682 } 683 684 int 685 rte_intr_callback_unregister_sync(const struct rte_intr_handle *intr_handle, 686 rte_intr_callback_fn cb_fn, void *cb_arg) 687 { 688 int ret = 0; 689 690 while ((ret = rte_intr_callback_unregister(intr_handle, cb_fn, cb_arg)) == -EAGAIN) 691 rte_pause(); 692 693 return ret; 694 } 695 696 int 697 rte_intr_enable(const struct rte_intr_handle *intr_handle) 698 { 699 int rc = 0, uio_cfg_fd; 700 701 if (intr_handle == NULL) 702 return -1; 703 704 if (rte_intr_type_get(intr_handle) == RTE_INTR_HANDLE_VDEV) { 705 rc = 0; 706 goto out; 707 } 708 709 uio_cfg_fd = rte_intr_dev_fd_get(intr_handle); 710 if (rte_intr_fd_get(intr_handle) < 0 || uio_cfg_fd < 0) { 711 rc = -1; 712 goto out; 713 } 714 715 switch (rte_intr_type_get(intr_handle)) { 716 /* write to the uio fd to enable the interrupt */ 717 case RTE_INTR_HANDLE_UIO: 718 if (uio_intr_enable(intr_handle)) 719 rc = -1; 720 break; 721 case RTE_INTR_HANDLE_UIO_INTX: 722 if (uio_intx_intr_enable(intr_handle)) 723 rc = -1; 724 break; 725 /* not used at this moment */ 726 case RTE_INTR_HANDLE_ALARM: 727 rc = -1; 728 break; 729 #ifdef VFIO_PRESENT 730 case RTE_INTR_HANDLE_VFIO_MSIX: 731 if (vfio_enable_msix(intr_handle)) 732 rc = -1; 733 break; 734 case RTE_INTR_HANDLE_VFIO_MSI: 735 if (vfio_enable_msi(intr_handle)) 736 rc = -1; 737 break; 738 case RTE_INTR_HANDLE_VFIO_LEGACY: 739 if (vfio_enable_intx(intr_handle)) 740 rc = -1; 741 break; 742 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE 743 case RTE_INTR_HANDLE_VFIO_REQ: 744 if (vfio_enable_req(intr_handle)) 745 rc = -1; 746 break; 747 #endif 748 #endif 749 /* not used at this moment */ 750 case RTE_INTR_HANDLE_DEV_EVENT: 751 rc = -1; 752 break; 753 /* unknown handle type */ 754 default: 755 EAL_LOG(ERR, "Unknown handle type of fd %d", 756 rte_intr_fd_get(intr_handle)); 757 rc = -1; 758 break; 759 } 760 out: 761 rte_eal_trace_intr_enable(intr_handle, rc); 762 return rc; 763 } 764 765 /** 766 * PMD generally calls this function at the end of its IRQ callback. 767 * Internally, it unmasks the interrupt if possible. 768 * 769 * For INTx, unmasking is required as the interrupt is auto-masked prior to 770 * invoking callback. 771 * 772 * For MSI/MSI-X, unmasking is typically not needed as the interrupt is not 773 * auto-masked. In fact, for interrupt handle types VFIO_MSIX and VFIO_MSI, 774 * this function is no-op. 775 */ 776 int 777 rte_intr_ack(const struct rte_intr_handle *intr_handle) 778 { 779 int uio_cfg_fd; 780 781 if (rte_intr_type_get(intr_handle) == RTE_INTR_HANDLE_VDEV) 782 return 0; 783 784 uio_cfg_fd = rte_intr_dev_fd_get(intr_handle); 785 if (rte_intr_fd_get(intr_handle) < 0 || uio_cfg_fd < 0) 786 return -1; 787 788 switch (rte_intr_type_get(intr_handle)) { 789 /* Both acking and enabling are same for UIO */ 790 case RTE_INTR_HANDLE_UIO: 791 if (uio_intr_enable(intr_handle)) 792 return -1; 793 break; 794 case RTE_INTR_HANDLE_UIO_INTX: 795 if (uio_intx_intr_enable(intr_handle)) 796 return -1; 797 break; 798 /* not used at this moment */ 799 case RTE_INTR_HANDLE_ALARM: 800 return -1; 801 #ifdef VFIO_PRESENT 802 /* VFIO MSI* is implicitly acked unlike INTx, nothing to do */ 803 case RTE_INTR_HANDLE_VFIO_MSIX: 804 case RTE_INTR_HANDLE_VFIO_MSI: 805 return 0; 806 case RTE_INTR_HANDLE_VFIO_LEGACY: 807 if (vfio_ack_intx(intr_handle)) 808 return -1; 809 break; 810 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE 811 case RTE_INTR_HANDLE_VFIO_REQ: 812 return -1; 813 #endif 814 #endif 815 /* not used at this moment */ 816 case RTE_INTR_HANDLE_DEV_EVENT: 817 return -1; 818 /* unknown handle type */ 819 default: 820 EAL_LOG(ERR, "Unknown handle type of fd %d", 821 rte_intr_fd_get(intr_handle)); 822 return -1; 823 } 824 825 return 0; 826 } 827 828 int 829 rte_intr_disable(const struct rte_intr_handle *intr_handle) 830 { 831 int rc = 0, uio_cfg_fd; 832 833 if (intr_handle == NULL) 834 return -1; 835 836 if (rte_intr_type_get(intr_handle) == RTE_INTR_HANDLE_VDEV) { 837 rc = 0; 838 goto out; 839 } 840 841 uio_cfg_fd = rte_intr_dev_fd_get(intr_handle); 842 if (rte_intr_fd_get(intr_handle) < 0 || uio_cfg_fd < 0) { 843 rc = -1; 844 goto out; 845 } 846 847 switch (rte_intr_type_get(intr_handle)) { 848 /* write to the uio fd to disable the interrupt */ 849 case RTE_INTR_HANDLE_UIO: 850 if (uio_intr_disable(intr_handle)) 851 rc = -1; 852 break; 853 case RTE_INTR_HANDLE_UIO_INTX: 854 if (uio_intx_intr_disable(intr_handle)) 855 rc = -1; 856 break; 857 /* not used at this moment */ 858 case RTE_INTR_HANDLE_ALARM: 859 rc = -1; 860 break; 861 #ifdef VFIO_PRESENT 862 case RTE_INTR_HANDLE_VFIO_MSIX: 863 if (vfio_disable_msix(intr_handle)) 864 rc = -1; 865 break; 866 case RTE_INTR_HANDLE_VFIO_MSI: 867 if (vfio_disable_msi(intr_handle)) 868 rc = -1; 869 break; 870 case RTE_INTR_HANDLE_VFIO_LEGACY: 871 if (vfio_disable_intx(intr_handle)) 872 rc = -1; 873 break; 874 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE 875 case RTE_INTR_HANDLE_VFIO_REQ: 876 if (vfio_disable_req(intr_handle)) 877 rc = -1; 878 break; 879 #endif 880 #endif 881 /* not used at this moment */ 882 case RTE_INTR_HANDLE_DEV_EVENT: 883 rc = -1; 884 break; 885 /* unknown handle type */ 886 default: 887 EAL_LOG(ERR, "Unknown handle type of fd %d", 888 rte_intr_fd_get(intr_handle)); 889 rc = -1; 890 break; 891 } 892 out: 893 rte_eal_trace_intr_disable(intr_handle, rc); 894 return rc; 895 } 896 897 static int 898 eal_intr_process_interrupts(struct epoll_event *events, int nfds) 899 { 900 bool call = false; 901 int n, bytes_read, rv; 902 struct rte_intr_source *src; 903 struct rte_intr_callback *cb, *next; 904 union rte_intr_read_buffer buf; 905 struct rte_intr_callback active_cb; 906 907 for (n = 0; n < nfds; n++) { 908 909 /** 910 * if the pipe fd is ready to read, return out to 911 * rebuild the wait list. 912 */ 913 if (events[n].data.fd == intr_pipe.readfd){ 914 int r = read(intr_pipe.readfd, buf.charbuf, 915 sizeof(buf.charbuf)); 916 RTE_SET_USED(r); 917 return -1; 918 } 919 rte_spinlock_lock(&intr_lock); 920 TAILQ_FOREACH(src, &intr_sources, next) 921 if (rte_intr_fd_get(src->intr_handle) == events[n].data.fd) 922 break; 923 if (src == NULL){ 924 rte_spinlock_unlock(&intr_lock); 925 continue; 926 } 927 928 /* mark this interrupt source as active and release the lock. */ 929 src->active = 1; 930 rte_spinlock_unlock(&intr_lock); 931 932 /* set the length to be read dor different handle type */ 933 switch (rte_intr_type_get(src->intr_handle)) { 934 case RTE_INTR_HANDLE_UIO: 935 case RTE_INTR_HANDLE_UIO_INTX: 936 bytes_read = sizeof(buf.uio_intr_count); 937 break; 938 case RTE_INTR_HANDLE_ALARM: 939 bytes_read = sizeof(buf.timerfd_num); 940 break; 941 #ifdef VFIO_PRESENT 942 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE 943 case RTE_INTR_HANDLE_VFIO_REQ: 944 #endif 945 case RTE_INTR_HANDLE_VFIO_MSIX: 946 case RTE_INTR_HANDLE_VFIO_MSI: 947 case RTE_INTR_HANDLE_VFIO_LEGACY: 948 bytes_read = sizeof(buf.vfio_intr_count); 949 break; 950 #endif 951 case RTE_INTR_HANDLE_VDEV: 952 case RTE_INTR_HANDLE_EXT: 953 bytes_read = 0; 954 call = true; 955 break; 956 case RTE_INTR_HANDLE_DEV_EVENT: 957 bytes_read = 0; 958 call = true; 959 break; 960 default: 961 bytes_read = 1; 962 break; 963 } 964 965 if (bytes_read > 0) { 966 /** 967 * read out to clear the ready-to-be-read flag 968 * for epoll_wait. 969 */ 970 bytes_read = read(events[n].data.fd, &buf, bytes_read); 971 if (bytes_read < 0) { 972 if (errno == EINTR || errno == EWOULDBLOCK) 973 continue; 974 975 EAL_LOG(ERR, "Error reading from file " 976 "descriptor %d: %s", 977 events[n].data.fd, 978 strerror(errno)); 979 /* 980 * The device is unplugged or buggy, remove 981 * it as an interrupt source and return to 982 * force the wait list to be rebuilt. 983 */ 984 rte_spinlock_lock(&intr_lock); 985 TAILQ_REMOVE(&intr_sources, src, next); 986 rte_spinlock_unlock(&intr_lock); 987 988 for (cb = TAILQ_FIRST(&src->callbacks); cb; 989 cb = next) { 990 next = TAILQ_NEXT(cb, next); 991 TAILQ_REMOVE(&src->callbacks, cb, next); 992 free(cb); 993 } 994 rte_intr_instance_free(src->intr_handle); 995 free(src); 996 return -1; 997 } else if (bytes_read == 0) 998 EAL_LOG(ERR, "Read nothing from file " 999 "descriptor %d", events[n].data.fd); 1000 else 1001 call = true; 1002 } 1003 1004 /* grab a lock, again to call callbacks and update status. */ 1005 rte_spinlock_lock(&intr_lock); 1006 1007 if (call) { 1008 1009 /* Finally, call all callbacks. */ 1010 TAILQ_FOREACH(cb, &src->callbacks, next) { 1011 1012 /* make a copy and unlock. */ 1013 active_cb = *cb; 1014 rte_spinlock_unlock(&intr_lock); 1015 1016 /* call the actual callback */ 1017 active_cb.cb_fn(active_cb.cb_arg); 1018 1019 /*get the lock back. */ 1020 rte_spinlock_lock(&intr_lock); 1021 } 1022 } 1023 /* we done with that interrupt source, release it. */ 1024 src->active = 0; 1025 1026 rv = 0; 1027 1028 /* check if any callback are supposed to be removed */ 1029 for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) { 1030 next = TAILQ_NEXT(cb, next); 1031 if (cb->pending_delete) { 1032 TAILQ_REMOVE(&src->callbacks, cb, next); 1033 if (cb->ucb_fn) 1034 cb->ucb_fn(src->intr_handle, cb->cb_arg); 1035 free(cb); 1036 rv++; 1037 } 1038 } 1039 1040 /* all callbacks for that source are removed. */ 1041 if (TAILQ_EMPTY(&src->callbacks)) { 1042 TAILQ_REMOVE(&intr_sources, src, next); 1043 rte_intr_instance_free(src->intr_handle); 1044 free(src); 1045 } 1046 1047 /* notify the pipe fd waited by epoll_wait to rebuild the wait list */ 1048 if (rv > 0 && write(intr_pipe.writefd, "1", 1) < 0) { 1049 rte_spinlock_unlock(&intr_lock); 1050 return -EPIPE; 1051 } 1052 1053 rte_spinlock_unlock(&intr_lock); 1054 } 1055 1056 return 0; 1057 } 1058 1059 /** 1060 * It handles all the interrupts. 1061 * 1062 * @param pfd 1063 * epoll file descriptor. 1064 * @param totalfds 1065 * The number of file descriptors added in epoll. 1066 * 1067 * @return 1068 * void 1069 */ 1070 static void 1071 eal_intr_handle_interrupts(int pfd, unsigned totalfds) 1072 { 1073 struct epoll_event events[totalfds]; 1074 int nfds = 0; 1075 1076 for(;;) { 1077 nfds = epoll_wait(pfd, events, totalfds, 1078 EAL_INTR_EPOLL_WAIT_FOREVER); 1079 /* epoll_wait fail */ 1080 if (nfds < 0) { 1081 if (errno == EINTR) 1082 continue; 1083 EAL_LOG(ERR, 1084 "epoll_wait returns with fail"); 1085 return; 1086 } 1087 /* epoll_wait timeout, will never happens here */ 1088 else if (nfds == 0) 1089 continue; 1090 /* epoll_wait has at least one fd ready to read */ 1091 if (eal_intr_process_interrupts(events, nfds) < 0) 1092 return; 1093 } 1094 } 1095 1096 /** 1097 * It builds/rebuilds up the epoll file descriptor with all the 1098 * file descriptors being waited on. Then handles the interrupts. 1099 * 1100 * @param arg 1101 * pointer. (unused) 1102 * 1103 * @return 1104 * never return; 1105 */ 1106 static __rte_noreturn uint32_t 1107 eal_intr_thread_main(__rte_unused void *arg) 1108 { 1109 /* host thread, never break out */ 1110 for (;;) { 1111 /* build up the epoll fd with all descriptors we are to 1112 * wait on then pass it to the handle_interrupts function 1113 */ 1114 static struct epoll_event pipe_event = { 1115 .events = EPOLLIN | EPOLLPRI, 1116 }; 1117 struct rte_intr_source *src; 1118 unsigned numfds = 0; 1119 1120 /* create epoll fd */ 1121 int pfd = epoll_create(1); 1122 if (pfd < 0) 1123 rte_panic("Cannot create epoll instance\n"); 1124 1125 pipe_event.data.fd = intr_pipe.readfd; 1126 /** 1127 * add pipe fd into wait list, this pipe is used to 1128 * rebuild the wait list. 1129 */ 1130 if (epoll_ctl(pfd, EPOLL_CTL_ADD, intr_pipe.readfd, 1131 &pipe_event) < 0) { 1132 rte_panic("Error adding fd to %d epoll_ctl, %s\n", 1133 intr_pipe.readfd, strerror(errno)); 1134 } 1135 numfds++; 1136 1137 rte_spinlock_lock(&intr_lock); 1138 1139 TAILQ_FOREACH(src, &intr_sources, next) { 1140 struct epoll_event ev; 1141 1142 if (src->callbacks.tqh_first == NULL) 1143 continue; /* skip those with no callbacks */ 1144 memset(&ev, 0, sizeof(ev)); 1145 ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP; 1146 ev.data.fd = rte_intr_fd_get(src->intr_handle); 1147 1148 /** 1149 * add all the uio device file descriptor 1150 * into wait list. 1151 */ 1152 if (epoll_ctl(pfd, EPOLL_CTL_ADD, 1153 rte_intr_fd_get(src->intr_handle), &ev) < 0) { 1154 rte_panic("Error adding fd %d epoll_ctl, %s\n", 1155 rte_intr_fd_get(src->intr_handle), 1156 strerror(errno)); 1157 } 1158 else 1159 numfds++; 1160 } 1161 rte_spinlock_unlock(&intr_lock); 1162 /* serve the interrupt */ 1163 eal_intr_handle_interrupts(pfd, numfds); 1164 1165 /** 1166 * when we return, we need to rebuild the 1167 * list of fds to monitor. 1168 */ 1169 close(pfd); 1170 } 1171 } 1172 1173 int 1174 rte_eal_intr_init(void) 1175 { 1176 int ret = 0; 1177 1178 /* init the global interrupt source head */ 1179 TAILQ_INIT(&intr_sources); 1180 1181 /** 1182 * create a pipe which will be waited by epoll and notified to 1183 * rebuild the wait list of epoll. 1184 */ 1185 if (pipe(intr_pipe.pipefd) < 0) { 1186 rte_errno = errno; 1187 return -1; 1188 } 1189 1190 /* create the host thread to wait/handle the interrupt */ 1191 ret = rte_thread_create_internal_control(&intr_thread, "intr", 1192 eal_intr_thread_main, NULL); 1193 if (ret != 0) { 1194 rte_errno = -ret; 1195 EAL_LOG(ERR, 1196 "Failed to create thread for interrupt handling"); 1197 } 1198 1199 return ret; 1200 } 1201 1202 static void 1203 eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle) 1204 { 1205 union rte_intr_read_buffer buf; 1206 int bytes_read = 0; 1207 int nbytes; 1208 1209 switch (rte_intr_type_get(intr_handle)) { 1210 case RTE_INTR_HANDLE_UIO: 1211 case RTE_INTR_HANDLE_UIO_INTX: 1212 bytes_read = sizeof(buf.uio_intr_count); 1213 break; 1214 #ifdef VFIO_PRESENT 1215 case RTE_INTR_HANDLE_VFIO_MSIX: 1216 case RTE_INTR_HANDLE_VFIO_MSI: 1217 case RTE_INTR_HANDLE_VFIO_LEGACY: 1218 bytes_read = sizeof(buf.vfio_intr_count); 1219 break; 1220 #endif 1221 case RTE_INTR_HANDLE_VDEV: 1222 bytes_read = rte_intr_efd_counter_size_get(intr_handle); 1223 /* For vdev, number of bytes to read is set by driver */ 1224 break; 1225 case RTE_INTR_HANDLE_EXT: 1226 return; 1227 default: 1228 bytes_read = 1; 1229 EAL_LOG(INFO, "unexpected intr type"); 1230 break; 1231 } 1232 1233 /** 1234 * read out to clear the ready-to-be-read flag 1235 * for epoll_wait. 1236 */ 1237 if (bytes_read == 0) 1238 return; 1239 do { 1240 nbytes = read(fd, &buf, bytes_read); 1241 if (nbytes < 0) { 1242 if (errno == EINTR || errno == EWOULDBLOCK || 1243 errno == EAGAIN) 1244 continue; 1245 EAL_LOG(ERR, 1246 "Error reading from fd %d: %s", 1247 fd, strerror(errno)); 1248 } else if (nbytes == 0) 1249 EAL_LOG(ERR, "Read nothing from fd %d", fd); 1250 return; 1251 } while (1); 1252 } 1253 1254 static int 1255 eal_epoll_process_event(struct epoll_event *evs, unsigned int n, 1256 struct rte_epoll_event *events) 1257 { 1258 unsigned int i, count = 0; 1259 struct rte_epoll_event *rev; 1260 uint32_t valid_status; 1261 1262 for (i = 0; i < n; i++) { 1263 rev = evs[i].data.ptr; 1264 valid_status = RTE_EPOLL_VALID; 1265 /* ACQUIRE memory ordering here pairs with RELEASE 1266 * ordering below acting as a lock to synchronize 1267 * the event data updating. 1268 */ 1269 if (!rev || !rte_atomic_compare_exchange_strong_explicit(&rev->status, 1270 &valid_status, RTE_EPOLL_EXEC, 1271 rte_memory_order_acquire, rte_memory_order_relaxed)) 1272 continue; 1273 1274 events[count].status = RTE_EPOLL_VALID; 1275 events[count].fd = rev->fd; 1276 events[count].epfd = rev->epfd; 1277 events[count].epdata.event = evs[i].events; 1278 events[count].epdata.data = rev->epdata.data; 1279 if (rev->epdata.cb_fun) 1280 rev->epdata.cb_fun(rev->fd, 1281 rev->epdata.cb_arg); 1282 1283 /* the status update should be observed after 1284 * the other fields change. 1285 */ 1286 rte_atomic_store_explicit(&rev->status, RTE_EPOLL_VALID, 1287 rte_memory_order_release); 1288 count++; 1289 } 1290 return count; 1291 } 1292 1293 static inline int 1294 eal_init_tls_epfd(void) 1295 { 1296 int pfd = epoll_create(255); 1297 1298 if (pfd < 0) { 1299 EAL_LOG(ERR, 1300 "Cannot create epoll instance"); 1301 return -1; 1302 } 1303 return pfd; 1304 } 1305 1306 int 1307 rte_intr_tls_epfd(void) 1308 { 1309 if (RTE_PER_LCORE(_epfd) == -1) 1310 RTE_PER_LCORE(_epfd) = eal_init_tls_epfd(); 1311 1312 return RTE_PER_LCORE(_epfd); 1313 } 1314 1315 static int 1316 eal_epoll_wait(int epfd, struct rte_epoll_event *events, 1317 int maxevents, int timeout, bool interruptible) 1318 { 1319 struct epoll_event evs[maxevents]; 1320 int rc; 1321 1322 if (!events) { 1323 EAL_LOG(ERR, "rte_epoll_event can't be NULL"); 1324 return -1; 1325 } 1326 1327 /* using per thread epoll fd */ 1328 if (epfd == RTE_EPOLL_PER_THREAD) 1329 epfd = rte_intr_tls_epfd(); 1330 1331 while (1) { 1332 rc = epoll_wait(epfd, evs, maxevents, timeout); 1333 if (likely(rc > 0)) { 1334 /* epoll_wait has at least one fd ready to read */ 1335 rc = eal_epoll_process_event(evs, rc, events); 1336 break; 1337 } else if (rc < 0) { 1338 if (errno == EINTR) { 1339 if (interruptible) 1340 return -1; 1341 else 1342 continue; 1343 } 1344 /* epoll_wait fail */ 1345 EAL_LOG(ERR, "epoll_wait returns with fail %s", 1346 strerror(errno)); 1347 rc = -1; 1348 break; 1349 } else { 1350 /* rc == 0, epoll_wait timed out */ 1351 break; 1352 } 1353 } 1354 1355 return rc; 1356 } 1357 1358 int 1359 rte_epoll_wait(int epfd, struct rte_epoll_event *events, 1360 int maxevents, int timeout) 1361 { 1362 return eal_epoll_wait(epfd, events, maxevents, timeout, false); 1363 } 1364 1365 int 1366 rte_epoll_wait_interruptible(int epfd, struct rte_epoll_event *events, 1367 int maxevents, int timeout) 1368 { 1369 return eal_epoll_wait(epfd, events, maxevents, timeout, true); 1370 } 1371 1372 static inline void 1373 eal_epoll_data_safe_free(struct rte_epoll_event *ev) 1374 { 1375 uint32_t valid_status = RTE_EPOLL_VALID; 1376 1377 while (!rte_atomic_compare_exchange_strong_explicit(&ev->status, &valid_status, 1378 RTE_EPOLL_INVALID, rte_memory_order_acquire, rte_memory_order_relaxed)) { 1379 while (rte_atomic_load_explicit(&ev->status, 1380 rte_memory_order_relaxed) != RTE_EPOLL_VALID) 1381 rte_pause(); 1382 valid_status = RTE_EPOLL_VALID; 1383 } 1384 memset(&ev->epdata, 0, sizeof(ev->epdata)); 1385 ev->fd = -1; 1386 ev->epfd = -1; 1387 } 1388 1389 int 1390 rte_epoll_ctl(int epfd, int op, int fd, 1391 struct rte_epoll_event *event) 1392 { 1393 struct epoll_event ev; 1394 1395 if (!event) { 1396 EAL_LOG(ERR, "rte_epoll_event can't be NULL"); 1397 return -1; 1398 } 1399 1400 /* using per thread epoll fd */ 1401 if (epfd == RTE_EPOLL_PER_THREAD) 1402 epfd = rte_intr_tls_epfd(); 1403 1404 if (op == EPOLL_CTL_ADD) { 1405 rte_atomic_store_explicit(&event->status, RTE_EPOLL_VALID, 1406 rte_memory_order_relaxed); 1407 event->fd = fd; /* ignore fd in event */ 1408 event->epfd = epfd; 1409 ev.data.ptr = (void *)event; 1410 } 1411 1412 ev.events = event->epdata.event; 1413 if (epoll_ctl(epfd, op, fd, &ev) < 0) { 1414 EAL_LOG(ERR, "Error op %d fd %d epoll_ctl, %s", 1415 op, fd, strerror(errno)); 1416 if (op == EPOLL_CTL_ADD) 1417 /* rollback status when CTL_ADD fail */ 1418 rte_atomic_store_explicit(&event->status, RTE_EPOLL_INVALID, 1419 rte_memory_order_relaxed); 1420 return -1; 1421 } 1422 1423 if (op == EPOLL_CTL_DEL && rte_atomic_load_explicit(&event->status, 1424 rte_memory_order_relaxed) != RTE_EPOLL_INVALID) 1425 eal_epoll_data_safe_free(event); 1426 1427 return 0; 1428 } 1429 1430 int 1431 rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, int epfd, 1432 int op, unsigned int vec, void *data) 1433 { 1434 struct rte_epoll_event *rev; 1435 struct rte_epoll_data *epdata; 1436 int epfd_op; 1437 unsigned int efd_idx; 1438 int rc = 0; 1439 1440 efd_idx = (vec >= RTE_INTR_VEC_RXTX_OFFSET) ? 1441 (vec - RTE_INTR_VEC_RXTX_OFFSET) : vec; 1442 1443 if (intr_handle == NULL || rte_intr_nb_efd_get(intr_handle) == 0 || 1444 efd_idx >= (unsigned int)rte_intr_nb_efd_get(intr_handle)) { 1445 EAL_LOG(ERR, "Wrong intr vector number."); 1446 return -EPERM; 1447 } 1448 1449 switch (op) { 1450 case RTE_INTR_EVENT_ADD: 1451 epfd_op = EPOLL_CTL_ADD; 1452 rev = rte_intr_elist_index_get(intr_handle, efd_idx); 1453 if (rte_atomic_load_explicit(&rev->status, 1454 rte_memory_order_relaxed) != RTE_EPOLL_INVALID) { 1455 EAL_LOG(INFO, "Event already been added."); 1456 return -EEXIST; 1457 } 1458 1459 /* attach to intr vector fd */ 1460 epdata = &rev->epdata; 1461 epdata->event = EPOLLIN | EPOLLPRI | EPOLLET; 1462 epdata->data = data; 1463 epdata->cb_fun = (rte_intr_event_cb_t)eal_intr_proc_rxtx_intr; 1464 epdata->cb_arg = (void *)intr_handle; 1465 rc = rte_epoll_ctl(epfd, epfd_op, 1466 rte_intr_efds_index_get(intr_handle, efd_idx), rev); 1467 if (!rc) 1468 EAL_LOG(DEBUG, 1469 "efd %d associated with vec %d added on epfd %d", 1470 rev->fd, vec, epfd); 1471 else 1472 rc = -EPERM; 1473 break; 1474 case RTE_INTR_EVENT_DEL: 1475 epfd_op = EPOLL_CTL_DEL; 1476 rev = rte_intr_elist_index_get(intr_handle, efd_idx); 1477 if (rte_atomic_load_explicit(&rev->status, 1478 rte_memory_order_relaxed) == RTE_EPOLL_INVALID) { 1479 EAL_LOG(INFO, "Event does not exist."); 1480 return -EPERM; 1481 } 1482 1483 rc = rte_epoll_ctl(rev->epfd, epfd_op, rev->fd, rev); 1484 if (rc) 1485 rc = -EPERM; 1486 break; 1487 default: 1488 EAL_LOG(ERR, "event op type mismatch"); 1489 rc = -EPERM; 1490 } 1491 1492 return rc; 1493 } 1494 1495 void 1496 rte_intr_free_epoll_fd(struct rte_intr_handle *intr_handle) 1497 { 1498 uint32_t i; 1499 struct rte_epoll_event *rev; 1500 1501 for (i = 0; i < (uint32_t)rte_intr_nb_efd_get(intr_handle); i++) { 1502 rev = rte_intr_elist_index_get(intr_handle, i); 1503 if (rte_atomic_load_explicit(&rev->status, 1504 rte_memory_order_relaxed) == RTE_EPOLL_INVALID) 1505 continue; 1506 if (rte_epoll_ctl(rev->epfd, EPOLL_CTL_DEL, rev->fd, rev)) { 1507 /* force free if the entry valid */ 1508 eal_epoll_data_safe_free(rev); 1509 } 1510 } 1511 } 1512 1513 int 1514 rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd) 1515 { 1516 uint32_t i; 1517 int fd; 1518 uint32_t n = RTE_MIN(nb_efd, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 1519 1520 assert(nb_efd != 0); 1521 1522 if (rte_intr_type_get(intr_handle) == RTE_INTR_HANDLE_VFIO_MSIX) { 1523 for (i = 0; i < n; i++) { 1524 fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); 1525 if (fd < 0) { 1526 EAL_LOG(ERR, 1527 "can't setup eventfd, error %i (%s)", 1528 errno, strerror(errno)); 1529 return -errno; 1530 } 1531 1532 if (rte_intr_efds_index_set(intr_handle, i, fd)) 1533 return -rte_errno; 1534 } 1535 1536 if (rte_intr_nb_efd_set(intr_handle, n)) 1537 return -rte_errno; 1538 1539 if (rte_intr_max_intr_set(intr_handle, NB_OTHER_INTR + n)) 1540 return -rte_errno; 1541 } else if (rte_intr_type_get(intr_handle) == RTE_INTR_HANDLE_VDEV) { 1542 /* only check, initialization would be done in vdev driver.*/ 1543 if ((uint64_t)rte_intr_efd_counter_size_get(intr_handle) > 1544 sizeof(union rte_intr_read_buffer)) { 1545 EAL_LOG(ERR, "the efd_counter_size is oversized"); 1546 return -EINVAL; 1547 } 1548 } else { 1549 if (rte_intr_efds_index_set(intr_handle, 0, rte_intr_fd_get(intr_handle))) 1550 return -rte_errno; 1551 if (rte_intr_nb_efd_set(intr_handle, RTE_MIN(nb_efd, 1U))) 1552 return -rte_errno; 1553 if (rte_intr_max_intr_set(intr_handle, NB_OTHER_INTR)) 1554 return -rte_errno; 1555 } 1556 1557 return 0; 1558 } 1559 1560 void 1561 rte_intr_efd_disable(struct rte_intr_handle *intr_handle) 1562 { 1563 uint32_t i; 1564 1565 rte_intr_free_epoll_fd(intr_handle); 1566 if (rte_intr_max_intr_get(intr_handle) > rte_intr_nb_efd_get(intr_handle)) { 1567 for (i = 0; i < (uint32_t)rte_intr_nb_efd_get(intr_handle); i++) 1568 close(rte_intr_efds_index_get(intr_handle, i)); 1569 } 1570 rte_intr_nb_efd_set(intr_handle, 0); 1571 rte_intr_max_intr_set(intr_handle, 0); 1572 } 1573 1574 int 1575 rte_intr_dp_is_en(struct rte_intr_handle *intr_handle) 1576 { 1577 return !(!rte_intr_nb_efd_get(intr_handle)); 1578 } 1579 1580 int 1581 rte_intr_allow_others(struct rte_intr_handle *intr_handle) 1582 { 1583 if (!rte_intr_dp_is_en(intr_handle)) 1584 return 1; 1585 else 1586 return !!(rte_intr_max_intr_get(intr_handle) - 1587 rte_intr_nb_efd_get(intr_handle)); 1588 } 1589 1590 int 1591 rte_intr_cap_multiple(struct rte_intr_handle *intr_handle) 1592 { 1593 if (rte_intr_type_get(intr_handle) == RTE_INTR_HANDLE_VFIO_MSIX) 1594 return 1; 1595 1596 if (rte_intr_type_get(intr_handle) == RTE_INTR_HANDLE_VDEV) 1597 return 1; 1598 1599 return 0; 1600 } 1601 1602 int rte_thread_is_intr(void) 1603 { 1604 return rte_thread_equal(intr_thread, rte_thread_self()); 1605 } 1606