1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <stdint.h> 7 #include <stdlib.h> 8 #include <pthread.h> 9 #include <sys/queue.h> 10 #include <stdarg.h> 11 #include <unistd.h> 12 #include <string.h> 13 #include <errno.h> 14 #include <inttypes.h> 15 #include <sys/epoll.h> 16 #include <sys/signalfd.h> 17 #include <sys/ioctl.h> 18 #include <sys/eventfd.h> 19 #include <assert.h> 20 #include <stdbool.h> 21 22 #include <rte_common.h> 23 #include <rte_interrupts.h> 24 #include <rte_memory.h> 25 #include <rte_launch.h> 26 #include <rte_eal.h> 27 #include <rte_per_lcore.h> 28 #include <rte_lcore.h> 29 #include <rte_branch_prediction.h> 30 #include <rte_debug.h> 31 #include <rte_log.h> 32 #include <rte_errno.h> 33 #include <rte_spinlock.h> 34 #include <rte_pause.h> 35 #include <rte_vfio.h> 36 #include <rte_eal_trace.h> 37 38 #include "eal_private.h" 39 #include "eal_vfio.h" 40 #include "eal_thread.h" 41 42 #define EAL_INTR_EPOLL_WAIT_FOREVER (-1) 43 #define NB_OTHER_INTR 1 44 45 static RTE_DEFINE_PER_LCORE(int, _epfd) = -1; /**< epoll fd per thread */ 46 47 /** 48 * union for pipe fds. 49 */ 50 union intr_pipefds{ 51 struct { 52 int pipefd[2]; 53 }; 54 struct { 55 int readfd; 56 int writefd; 57 }; 58 }; 59 60 /** 61 * union buffer for reading on different devices 62 */ 63 union rte_intr_read_buffer { 64 int uio_intr_count; /* for uio device */ 65 #ifdef VFIO_PRESENT 66 uint64_t vfio_intr_count; /* for vfio device */ 67 #endif 68 uint64_t timerfd_num; /* for timerfd */ 69 char charbuf[16]; /* for others */ 70 }; 71 72 TAILQ_HEAD(rte_intr_cb_list, rte_intr_callback); 73 TAILQ_HEAD(rte_intr_source_list, rte_intr_source); 74 75 struct rte_intr_callback { 76 TAILQ_ENTRY(rte_intr_callback) next; 77 rte_intr_callback_fn cb_fn; /**< callback address */ 78 void *cb_arg; /**< parameter for callback */ 79 uint8_t pending_delete; /**< delete after callback is called */ 80 rte_intr_unregister_callback_fn ucb_fn; /**< fn to call before cb is deleted */ 81 }; 82 83 struct rte_intr_source { 84 TAILQ_ENTRY(rte_intr_source) next; 85 struct rte_intr_handle *intr_handle; /**< interrupt handle */ 86 struct rte_intr_cb_list callbacks; /**< user callbacks */ 87 uint32_t active; 88 }; 89 90 /* global spinlock for interrupt data operation */ 91 static rte_spinlock_t intr_lock = RTE_SPINLOCK_INITIALIZER; 92 93 /* union buffer for pipe read/write */ 94 static union intr_pipefds intr_pipe; 95 96 /* interrupt sources list */ 97 static struct rte_intr_source_list intr_sources; 98 99 /* interrupt handling thread */ 100 static pthread_t intr_thread; 101 102 /* VFIO interrupts */ 103 #ifdef VFIO_PRESENT 104 105 #define IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + sizeof(int)) 106 /* irq set buffer length for queue interrupts and LSC interrupt */ 107 #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \ 108 sizeof(int) * (RTE_MAX_RXTX_INTR_VEC_ID + 1)) 109 110 /* enable legacy (INTx) interrupts */ 111 static int 112 vfio_enable_intx(const struct rte_intr_handle *intr_handle) { 113 struct vfio_irq_set *irq_set; 114 char irq_set_buf[IRQ_SET_BUF_LEN]; 115 int len, ret, vfio_dev_fd; 116 int *fd_ptr; 117 118 len = sizeof(irq_set_buf); 119 120 /* enable INTx */ 121 irq_set = (struct vfio_irq_set *) irq_set_buf; 122 irq_set->argsz = len; 123 irq_set->count = 1; 124 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; 125 irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; 126 irq_set->start = 0; 127 fd_ptr = (int *) &irq_set->data; 128 *fd_ptr = rte_intr_fd_get(intr_handle); 129 130 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 131 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 132 133 if (ret) { 134 RTE_LOG(ERR, EAL, "Error enabling INTx interrupts for fd %d\n", 135 rte_intr_fd_get(intr_handle)); 136 return -1; 137 } 138 139 /* unmask INTx after enabling */ 140 memset(irq_set, 0, len); 141 len = sizeof(struct vfio_irq_set); 142 irq_set->argsz = len; 143 irq_set->count = 1; 144 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; 145 irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; 146 irq_set->start = 0; 147 148 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 149 150 if (ret) { 151 RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", 152 rte_intr_fd_get(intr_handle)); 153 return -1; 154 } 155 return 0; 156 } 157 158 /* disable legacy (INTx) interrupts */ 159 static int 160 vfio_disable_intx(const struct rte_intr_handle *intr_handle) { 161 struct vfio_irq_set *irq_set; 162 char irq_set_buf[IRQ_SET_BUF_LEN]; 163 int len, ret, vfio_dev_fd; 164 165 len = sizeof(struct vfio_irq_set); 166 167 /* mask interrupts before disabling */ 168 irq_set = (struct vfio_irq_set *) irq_set_buf; 169 irq_set->argsz = len; 170 irq_set->count = 1; 171 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK; 172 irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; 173 irq_set->start = 0; 174 175 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 176 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 177 178 if (ret) { 179 RTE_LOG(ERR, EAL, "Error masking INTx interrupts for fd %d\n", 180 rte_intr_fd_get(intr_handle)); 181 return -1; 182 } 183 184 /* disable INTx*/ 185 memset(irq_set, 0, len); 186 irq_set->argsz = len; 187 irq_set->count = 0; 188 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; 189 irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; 190 irq_set->start = 0; 191 192 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 193 194 if (ret) { 195 RTE_LOG(ERR, EAL, "Error disabling INTx interrupts for fd %d\n", 196 rte_intr_fd_get(intr_handle)); 197 return -1; 198 } 199 return 0; 200 } 201 202 /* unmask/ack legacy (INTx) interrupts */ 203 static int 204 vfio_ack_intx(const struct rte_intr_handle *intr_handle) 205 { 206 struct vfio_irq_set irq_set; 207 int vfio_dev_fd; 208 209 /* unmask INTx */ 210 memset(&irq_set, 0, sizeof(irq_set)); 211 irq_set.argsz = sizeof(irq_set); 212 irq_set.count = 1; 213 irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; 214 irq_set.index = VFIO_PCI_INTX_IRQ_INDEX; 215 irq_set.start = 0; 216 217 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 218 if (ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set)) { 219 RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", 220 rte_intr_fd_get(intr_handle)); 221 return -1; 222 } 223 return 0; 224 } 225 226 /* enable MSI interrupts */ 227 static int 228 vfio_enable_msi(const struct rte_intr_handle *intr_handle) { 229 int len, ret; 230 char irq_set_buf[IRQ_SET_BUF_LEN]; 231 struct vfio_irq_set *irq_set; 232 int *fd_ptr, vfio_dev_fd; 233 234 len = sizeof(irq_set_buf); 235 236 irq_set = (struct vfio_irq_set *) irq_set_buf; 237 irq_set->argsz = len; 238 irq_set->count = 1; 239 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; 240 irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; 241 irq_set->start = 0; 242 fd_ptr = (int *) &irq_set->data; 243 *fd_ptr = rte_intr_fd_get(intr_handle); 244 245 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 246 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 247 248 if (ret) { 249 RTE_LOG(ERR, EAL, "Error enabling MSI interrupts for fd %d\n", 250 rte_intr_fd_get(intr_handle)); 251 return -1; 252 } 253 return 0; 254 } 255 256 /* disable MSI interrupts */ 257 static int 258 vfio_disable_msi(const struct rte_intr_handle *intr_handle) { 259 struct vfio_irq_set *irq_set; 260 char irq_set_buf[IRQ_SET_BUF_LEN]; 261 int len, ret, vfio_dev_fd; 262 263 len = sizeof(struct vfio_irq_set); 264 265 irq_set = (struct vfio_irq_set *) irq_set_buf; 266 irq_set->argsz = len; 267 irq_set->count = 0; 268 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; 269 irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; 270 irq_set->start = 0; 271 272 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 273 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 274 if (ret) 275 RTE_LOG(ERR, EAL, "Error disabling MSI interrupts for fd %d\n", 276 rte_intr_fd_get(intr_handle)); 277 278 return ret; 279 } 280 281 /* enable MSI-X interrupts */ 282 static int 283 vfio_enable_msix(const struct rte_intr_handle *intr_handle) { 284 int len, ret; 285 char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; 286 struct vfio_irq_set *irq_set; 287 int *fd_ptr, vfio_dev_fd, i; 288 289 len = sizeof(irq_set_buf); 290 291 irq_set = (struct vfio_irq_set *) irq_set_buf; 292 irq_set->argsz = len; 293 /* 0 < irq_set->count < RTE_MAX_RXTX_INTR_VEC_ID + 1 */ 294 irq_set->count = rte_intr_max_intr_get(intr_handle) ? 295 (rte_intr_max_intr_get(intr_handle) > 296 RTE_MAX_RXTX_INTR_VEC_ID + 1 ? RTE_MAX_RXTX_INTR_VEC_ID + 1 : 297 rte_intr_max_intr_get(intr_handle)) : 1; 298 299 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; 300 irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; 301 irq_set->start = 0; 302 fd_ptr = (int *) &irq_set->data; 303 /* INTR vector offset 0 reserve for non-efds mapping */ 304 fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = rte_intr_fd_get(intr_handle); 305 for (i = 0; i < rte_intr_nb_efd_get(intr_handle); i++) { 306 fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = 307 rte_intr_efds_index_get(intr_handle, i); 308 } 309 310 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 311 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 312 313 if (ret) { 314 RTE_LOG(ERR, EAL, "Error enabling MSI-X interrupts for fd %d\n", 315 rte_intr_fd_get(intr_handle)); 316 return -1; 317 } 318 319 return 0; 320 } 321 322 /* disable MSI-X interrupts */ 323 static int 324 vfio_disable_msix(const struct rte_intr_handle *intr_handle) { 325 struct vfio_irq_set *irq_set; 326 char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; 327 int len, ret, vfio_dev_fd; 328 329 len = sizeof(struct vfio_irq_set); 330 331 irq_set = (struct vfio_irq_set *) irq_set_buf; 332 irq_set->argsz = len; 333 irq_set->count = 0; 334 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; 335 irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; 336 irq_set->start = 0; 337 338 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 339 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 340 341 if (ret) 342 RTE_LOG(ERR, EAL, "Error disabling MSI-X interrupts for fd %d\n", 343 rte_intr_fd_get(intr_handle)); 344 345 return ret; 346 } 347 348 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE 349 /* enable req notifier */ 350 static int 351 vfio_enable_req(const struct rte_intr_handle *intr_handle) 352 { 353 int len, ret; 354 char irq_set_buf[IRQ_SET_BUF_LEN]; 355 struct vfio_irq_set *irq_set; 356 int *fd_ptr, vfio_dev_fd; 357 358 len = sizeof(irq_set_buf); 359 360 irq_set = (struct vfio_irq_set *) irq_set_buf; 361 irq_set->argsz = len; 362 irq_set->count = 1; 363 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | 364 VFIO_IRQ_SET_ACTION_TRIGGER; 365 irq_set->index = VFIO_PCI_REQ_IRQ_INDEX; 366 irq_set->start = 0; 367 fd_ptr = (int *) &irq_set->data; 368 *fd_ptr = rte_intr_fd_get(intr_handle); 369 370 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 371 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 372 373 if (ret) { 374 RTE_LOG(ERR, EAL, "Error enabling req interrupts for fd %d\n", 375 rte_intr_fd_get(intr_handle)); 376 return -1; 377 } 378 379 return 0; 380 } 381 382 /* disable req notifier */ 383 static int 384 vfio_disable_req(const struct rte_intr_handle *intr_handle) 385 { 386 struct vfio_irq_set *irq_set; 387 char irq_set_buf[IRQ_SET_BUF_LEN]; 388 int len, ret, vfio_dev_fd; 389 390 len = sizeof(struct vfio_irq_set); 391 392 irq_set = (struct vfio_irq_set *) irq_set_buf; 393 irq_set->argsz = len; 394 irq_set->count = 0; 395 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; 396 irq_set->index = VFIO_PCI_REQ_IRQ_INDEX; 397 irq_set->start = 0; 398 399 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 400 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 401 402 if (ret) 403 RTE_LOG(ERR, EAL, "Error disabling req interrupts for fd %d\n", 404 rte_intr_fd_get(intr_handle)); 405 406 return ret; 407 } 408 #endif 409 #endif 410 411 static int 412 uio_intx_intr_disable(const struct rte_intr_handle *intr_handle) 413 { 414 unsigned char command_high; 415 int uio_cfg_fd; 416 417 /* use UIO config file descriptor for uio_pci_generic */ 418 uio_cfg_fd = rte_intr_dev_fd_get(intr_handle); 419 if (pread(uio_cfg_fd, &command_high, 1, 5) != 1) { 420 RTE_LOG(ERR, EAL, 421 "Error reading interrupts status for fd %d\n", 422 uio_cfg_fd); 423 return -1; 424 } 425 /* disable interrupts */ 426 command_high |= 0x4; 427 if (pwrite(uio_cfg_fd, &command_high, 1, 5) != 1) { 428 RTE_LOG(ERR, EAL, 429 "Error disabling interrupts for fd %d\n", 430 uio_cfg_fd); 431 return -1; 432 } 433 434 return 0; 435 } 436 437 static int 438 uio_intx_intr_enable(const struct rte_intr_handle *intr_handle) 439 { 440 unsigned char command_high; 441 int uio_cfg_fd; 442 443 /* use UIO config file descriptor for uio_pci_generic */ 444 uio_cfg_fd = rte_intr_dev_fd_get(intr_handle); 445 if (pread(uio_cfg_fd, &command_high, 1, 5) != 1) { 446 RTE_LOG(ERR, EAL, 447 "Error reading interrupts status for fd %d\n", 448 uio_cfg_fd); 449 return -1; 450 } 451 /* enable interrupts */ 452 command_high &= ~0x4; 453 if (pwrite(uio_cfg_fd, &command_high, 1, 5) != 1) { 454 RTE_LOG(ERR, EAL, 455 "Error enabling interrupts for fd %d\n", 456 uio_cfg_fd); 457 return -1; 458 } 459 460 return 0; 461 } 462 463 static int 464 uio_intr_disable(const struct rte_intr_handle *intr_handle) 465 { 466 const int value = 0; 467 468 if (write(rte_intr_fd_get(intr_handle), &value, sizeof(value)) < 0) { 469 RTE_LOG(ERR, EAL, "Error disabling interrupts for fd %d (%s)\n", 470 rte_intr_fd_get(intr_handle), strerror(errno)); 471 return -1; 472 } 473 return 0; 474 } 475 476 static int 477 uio_intr_enable(const struct rte_intr_handle *intr_handle) 478 { 479 const int value = 1; 480 481 if (write(rte_intr_fd_get(intr_handle), &value, sizeof(value)) < 0) { 482 RTE_LOG(ERR, EAL, "Error enabling interrupts for fd %d (%s)\n", 483 rte_intr_fd_get(intr_handle), strerror(errno)); 484 return -1; 485 } 486 return 0; 487 } 488 489 int 490 rte_intr_callback_register(const struct rte_intr_handle *intr_handle, 491 rte_intr_callback_fn cb, void *cb_arg) 492 { 493 int ret, wake_thread; 494 struct rte_intr_source *src; 495 struct rte_intr_callback *callback; 496 497 wake_thread = 0; 498 499 /* first do parameter checking */ 500 if (rte_intr_fd_get(intr_handle) < 0 || cb == NULL) { 501 RTE_LOG(ERR, EAL, "Registering with invalid input parameter\n"); 502 return -EINVAL; 503 } 504 505 /* allocate a new interrupt callback entity */ 506 callback = calloc(1, sizeof(*callback)); 507 if (callback == NULL) { 508 RTE_LOG(ERR, EAL, "Can not allocate memory\n"); 509 return -ENOMEM; 510 } 511 callback->cb_fn = cb; 512 callback->cb_arg = cb_arg; 513 callback->pending_delete = 0; 514 callback->ucb_fn = NULL; 515 516 rte_spinlock_lock(&intr_lock); 517 518 /* check if there is at least one callback registered for the fd */ 519 TAILQ_FOREACH(src, &intr_sources, next) { 520 if (rte_intr_fd_get(src->intr_handle) == rte_intr_fd_get(intr_handle)) { 521 /* we had no interrupts for this */ 522 if (TAILQ_EMPTY(&src->callbacks)) 523 wake_thread = 1; 524 525 TAILQ_INSERT_TAIL(&(src->callbacks), callback, next); 526 ret = 0; 527 break; 528 } 529 } 530 531 /* no existing callbacks for this - add new source */ 532 if (src == NULL) { 533 src = calloc(1, sizeof(*src)); 534 if (src == NULL) { 535 RTE_LOG(ERR, EAL, "Can not allocate memory\n"); 536 ret = -ENOMEM; 537 free(callback); 538 callback = NULL; 539 } else { 540 src->intr_handle = rte_intr_instance_dup(intr_handle); 541 if (src->intr_handle == NULL) { 542 RTE_LOG(ERR, EAL, "Can not create intr instance\n"); 543 ret = -ENOMEM; 544 free(callback); 545 callback = NULL; 546 free(src); 547 src = NULL; 548 } else { 549 TAILQ_INIT(&src->callbacks); 550 TAILQ_INSERT_TAIL(&(src->callbacks), callback, 551 next); 552 TAILQ_INSERT_TAIL(&intr_sources, src, next); 553 wake_thread = 1; 554 ret = 0; 555 } 556 } 557 } 558 559 rte_spinlock_unlock(&intr_lock); 560 561 /** 562 * check if need to notify the pipe fd waited by epoll_wait to 563 * rebuild the wait list. 564 */ 565 if (wake_thread) 566 if (write(intr_pipe.writefd, "1", 1) < 0) 567 ret = -EPIPE; 568 569 rte_eal_trace_intr_callback_register(intr_handle, cb, cb_arg, ret); 570 return ret; 571 } 572 573 int 574 rte_intr_callback_unregister_pending(const struct rte_intr_handle *intr_handle, 575 rte_intr_callback_fn cb_fn, void *cb_arg, 576 rte_intr_unregister_callback_fn ucb_fn) 577 { 578 int ret; 579 struct rte_intr_source *src; 580 struct rte_intr_callback *cb, *next; 581 582 /* do parameter checking first */ 583 if (rte_intr_fd_get(intr_handle) < 0) { 584 RTE_LOG(ERR, EAL, "Unregistering with invalid input parameter\n"); 585 return -EINVAL; 586 } 587 588 rte_spinlock_lock(&intr_lock); 589 590 /* check if the insterrupt source for the fd is existent */ 591 TAILQ_FOREACH(src, &intr_sources, next) { 592 if (rte_intr_fd_get(src->intr_handle) == rte_intr_fd_get(intr_handle)) 593 break; 594 } 595 596 /* No interrupt source registered for the fd */ 597 if (src == NULL) { 598 ret = -ENOENT; 599 600 /* only usable if the source is active */ 601 } else if (src->active == 0) { 602 ret = -EAGAIN; 603 604 } else { 605 ret = 0; 606 607 /* walk through the callbacks and mark all that match. */ 608 for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) { 609 next = TAILQ_NEXT(cb, next); 610 if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 || 611 cb->cb_arg == cb_arg)) { 612 cb->pending_delete = 1; 613 cb->ucb_fn = ucb_fn; 614 ret++; 615 } 616 } 617 } 618 619 rte_spinlock_unlock(&intr_lock); 620 621 return ret; 622 } 623 624 int 625 rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle, 626 rte_intr_callback_fn cb_fn, void *cb_arg) 627 { 628 int ret; 629 struct rte_intr_source *src; 630 struct rte_intr_callback *cb, *next; 631 632 /* do parameter checking first */ 633 if (rte_intr_fd_get(intr_handle) < 0) { 634 RTE_LOG(ERR, EAL, "Unregistering with invalid input parameter\n"); 635 return -EINVAL; 636 } 637 638 rte_spinlock_lock(&intr_lock); 639 640 /* check if the insterrupt source for the fd is existent */ 641 TAILQ_FOREACH(src, &intr_sources, next) 642 if (rte_intr_fd_get(src->intr_handle) == rte_intr_fd_get(intr_handle)) 643 break; 644 645 /* No interrupt source registered for the fd */ 646 if (src == NULL) { 647 ret = -ENOENT; 648 649 /* interrupt source has some active callbacks right now. */ 650 } else if (src->active != 0) { 651 ret = -EAGAIN; 652 653 /* ok to remove. */ 654 } else { 655 ret = 0; 656 657 /*walk through the callbacks and remove all that match. */ 658 for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) { 659 660 next = TAILQ_NEXT(cb, next); 661 662 if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 || 663 cb->cb_arg == cb_arg)) { 664 TAILQ_REMOVE(&src->callbacks, cb, next); 665 free(cb); 666 ret++; 667 } 668 } 669 670 /* all callbacks for that source are removed. */ 671 if (TAILQ_EMPTY(&src->callbacks)) { 672 TAILQ_REMOVE(&intr_sources, src, next); 673 rte_intr_instance_free(src->intr_handle); 674 free(src); 675 } 676 } 677 678 rte_spinlock_unlock(&intr_lock); 679 680 /* notify the pipe fd waited by epoll_wait to rebuild the wait list */ 681 if (ret >= 0 && write(intr_pipe.writefd, "1", 1) < 0) { 682 ret = -EPIPE; 683 } 684 685 rte_eal_trace_intr_callback_unregister(intr_handle, cb_fn, cb_arg, 686 ret); 687 return ret; 688 } 689 690 int 691 rte_intr_callback_unregister_sync(const struct rte_intr_handle *intr_handle, 692 rte_intr_callback_fn cb_fn, void *cb_arg) 693 { 694 int ret = 0; 695 696 while ((ret = rte_intr_callback_unregister(intr_handle, cb_fn, cb_arg)) == -EAGAIN) 697 rte_pause(); 698 699 return ret; 700 } 701 702 int 703 rte_intr_enable(const struct rte_intr_handle *intr_handle) 704 { 705 int rc = 0, uio_cfg_fd; 706 707 if (intr_handle == NULL) 708 return -1; 709 710 if (rte_intr_type_get(intr_handle) == RTE_INTR_HANDLE_VDEV) { 711 rc = 0; 712 goto out; 713 } 714 715 uio_cfg_fd = rte_intr_dev_fd_get(intr_handle); 716 if (rte_intr_fd_get(intr_handle) < 0 || uio_cfg_fd < 0) { 717 rc = -1; 718 goto out; 719 } 720 721 switch (rte_intr_type_get(intr_handle)) { 722 /* write to the uio fd to enable the interrupt */ 723 case RTE_INTR_HANDLE_UIO: 724 if (uio_intr_enable(intr_handle)) 725 rc = -1; 726 break; 727 case RTE_INTR_HANDLE_UIO_INTX: 728 if (uio_intx_intr_enable(intr_handle)) 729 rc = -1; 730 break; 731 /* not used at this moment */ 732 case RTE_INTR_HANDLE_ALARM: 733 rc = -1; 734 break; 735 #ifdef VFIO_PRESENT 736 case RTE_INTR_HANDLE_VFIO_MSIX: 737 if (vfio_enable_msix(intr_handle)) 738 rc = -1; 739 break; 740 case RTE_INTR_HANDLE_VFIO_MSI: 741 if (vfio_enable_msi(intr_handle)) 742 rc = -1; 743 break; 744 case RTE_INTR_HANDLE_VFIO_LEGACY: 745 if (vfio_enable_intx(intr_handle)) 746 rc = -1; 747 break; 748 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE 749 case RTE_INTR_HANDLE_VFIO_REQ: 750 if (vfio_enable_req(intr_handle)) 751 rc = -1; 752 break; 753 #endif 754 #endif 755 /* not used at this moment */ 756 case RTE_INTR_HANDLE_DEV_EVENT: 757 rc = -1; 758 break; 759 /* unknown handle type */ 760 default: 761 RTE_LOG(ERR, EAL, "Unknown handle type of fd %d\n", 762 rte_intr_fd_get(intr_handle)); 763 rc = -1; 764 break; 765 } 766 out: 767 rte_eal_trace_intr_enable(intr_handle, rc); 768 return rc; 769 } 770 771 /** 772 * PMD generally calls this function at the end of its IRQ callback. 773 * Internally, it unmasks the interrupt if possible. 774 * 775 * For INTx, unmasking is required as the interrupt is auto-masked prior to 776 * invoking callback. 777 * 778 * For MSI/MSI-X, unmasking is typically not needed as the interrupt is not 779 * auto-masked. In fact, for interrupt handle types VFIO_MSIX and VFIO_MSI, 780 * this function is no-op. 781 */ 782 int 783 rte_intr_ack(const struct rte_intr_handle *intr_handle) 784 { 785 int uio_cfg_fd; 786 787 if (rte_intr_type_get(intr_handle) == RTE_INTR_HANDLE_VDEV) 788 return 0; 789 790 uio_cfg_fd = rte_intr_dev_fd_get(intr_handle); 791 if (rte_intr_fd_get(intr_handle) < 0 || uio_cfg_fd < 0) 792 return -1; 793 794 switch (rte_intr_type_get(intr_handle)) { 795 /* Both acking and enabling are same for UIO */ 796 case RTE_INTR_HANDLE_UIO: 797 if (uio_intr_enable(intr_handle)) 798 return -1; 799 break; 800 case RTE_INTR_HANDLE_UIO_INTX: 801 if (uio_intx_intr_enable(intr_handle)) 802 return -1; 803 break; 804 /* not used at this moment */ 805 case RTE_INTR_HANDLE_ALARM: 806 return -1; 807 #ifdef VFIO_PRESENT 808 /* VFIO MSI* is implicitly acked unlike INTx, nothing to do */ 809 case RTE_INTR_HANDLE_VFIO_MSIX: 810 case RTE_INTR_HANDLE_VFIO_MSI: 811 return 0; 812 case RTE_INTR_HANDLE_VFIO_LEGACY: 813 if (vfio_ack_intx(intr_handle)) 814 return -1; 815 break; 816 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE 817 case RTE_INTR_HANDLE_VFIO_REQ: 818 return -1; 819 #endif 820 #endif 821 /* not used at this moment */ 822 case RTE_INTR_HANDLE_DEV_EVENT: 823 return -1; 824 /* unknown handle type */ 825 default: 826 RTE_LOG(ERR, EAL, "Unknown handle type of fd %d\n", 827 rte_intr_fd_get(intr_handle)); 828 return -1; 829 } 830 831 return 0; 832 } 833 834 int 835 rte_intr_disable(const struct rte_intr_handle *intr_handle) 836 { 837 int rc = 0, uio_cfg_fd; 838 839 if (intr_handle == NULL) 840 return -1; 841 842 if (rte_intr_type_get(intr_handle) == RTE_INTR_HANDLE_VDEV) { 843 rc = 0; 844 goto out; 845 } 846 847 uio_cfg_fd = rte_intr_dev_fd_get(intr_handle); 848 if (rte_intr_fd_get(intr_handle) < 0 || uio_cfg_fd < 0) { 849 rc = -1; 850 goto out; 851 } 852 853 switch (rte_intr_type_get(intr_handle)) { 854 /* write to the uio fd to disable the interrupt */ 855 case RTE_INTR_HANDLE_UIO: 856 if (uio_intr_disable(intr_handle)) 857 rc = -1; 858 break; 859 case RTE_INTR_HANDLE_UIO_INTX: 860 if (uio_intx_intr_disable(intr_handle)) 861 rc = -1; 862 break; 863 /* not used at this moment */ 864 case RTE_INTR_HANDLE_ALARM: 865 rc = -1; 866 break; 867 #ifdef VFIO_PRESENT 868 case RTE_INTR_HANDLE_VFIO_MSIX: 869 if (vfio_disable_msix(intr_handle)) 870 rc = -1; 871 break; 872 case RTE_INTR_HANDLE_VFIO_MSI: 873 if (vfio_disable_msi(intr_handle)) 874 rc = -1; 875 break; 876 case RTE_INTR_HANDLE_VFIO_LEGACY: 877 if (vfio_disable_intx(intr_handle)) 878 rc = -1; 879 break; 880 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE 881 case RTE_INTR_HANDLE_VFIO_REQ: 882 if (vfio_disable_req(intr_handle)) 883 rc = -1; 884 break; 885 #endif 886 #endif 887 /* not used at this moment */ 888 case RTE_INTR_HANDLE_DEV_EVENT: 889 rc = -1; 890 break; 891 /* unknown handle type */ 892 default: 893 RTE_LOG(ERR, EAL, "Unknown handle type of fd %d\n", 894 rte_intr_fd_get(intr_handle)); 895 rc = -1; 896 break; 897 } 898 out: 899 rte_eal_trace_intr_disable(intr_handle, rc); 900 return rc; 901 } 902 903 static int 904 eal_intr_process_interrupts(struct epoll_event *events, int nfds) 905 { 906 bool call = false; 907 int n, bytes_read, rv; 908 struct rte_intr_source *src; 909 struct rte_intr_callback *cb, *next; 910 union rte_intr_read_buffer buf; 911 struct rte_intr_callback active_cb; 912 913 for (n = 0; n < nfds; n++) { 914 915 /** 916 * if the pipe fd is ready to read, return out to 917 * rebuild the wait list. 918 */ 919 if (events[n].data.fd == intr_pipe.readfd){ 920 int r = read(intr_pipe.readfd, buf.charbuf, 921 sizeof(buf.charbuf)); 922 RTE_SET_USED(r); 923 return -1; 924 } 925 rte_spinlock_lock(&intr_lock); 926 TAILQ_FOREACH(src, &intr_sources, next) 927 if (rte_intr_fd_get(src->intr_handle) == events[n].data.fd) 928 break; 929 if (src == NULL){ 930 rte_spinlock_unlock(&intr_lock); 931 continue; 932 } 933 934 /* mark this interrupt source as active and release the lock. */ 935 src->active = 1; 936 rte_spinlock_unlock(&intr_lock); 937 938 /* set the length to be read dor different handle type */ 939 switch (rte_intr_type_get(src->intr_handle)) { 940 case RTE_INTR_HANDLE_UIO: 941 case RTE_INTR_HANDLE_UIO_INTX: 942 bytes_read = sizeof(buf.uio_intr_count); 943 break; 944 case RTE_INTR_HANDLE_ALARM: 945 bytes_read = sizeof(buf.timerfd_num); 946 break; 947 #ifdef VFIO_PRESENT 948 case RTE_INTR_HANDLE_VFIO_MSIX: 949 case RTE_INTR_HANDLE_VFIO_MSI: 950 case RTE_INTR_HANDLE_VFIO_LEGACY: 951 bytes_read = sizeof(buf.vfio_intr_count); 952 break; 953 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE 954 case RTE_INTR_HANDLE_VFIO_REQ: 955 bytes_read = 0; 956 call = true; 957 break; 958 #endif 959 #endif 960 case RTE_INTR_HANDLE_VDEV: 961 case RTE_INTR_HANDLE_EXT: 962 bytes_read = 0; 963 call = true; 964 break; 965 case RTE_INTR_HANDLE_DEV_EVENT: 966 bytes_read = 0; 967 call = true; 968 break; 969 default: 970 bytes_read = 1; 971 break; 972 } 973 974 if (bytes_read > 0) { 975 /** 976 * read out to clear the ready-to-be-read flag 977 * for epoll_wait. 978 */ 979 bytes_read = read(events[n].data.fd, &buf, bytes_read); 980 if (bytes_read < 0) { 981 if (errno == EINTR || errno == EWOULDBLOCK) 982 continue; 983 984 RTE_LOG(ERR, EAL, "Error reading from file " 985 "descriptor %d: %s\n", 986 events[n].data.fd, 987 strerror(errno)); 988 /* 989 * The device is unplugged or buggy, remove 990 * it as an interrupt source and return to 991 * force the wait list to be rebuilt. 992 */ 993 rte_spinlock_lock(&intr_lock); 994 TAILQ_REMOVE(&intr_sources, src, next); 995 rte_spinlock_unlock(&intr_lock); 996 997 for (cb = TAILQ_FIRST(&src->callbacks); cb; 998 cb = next) { 999 next = TAILQ_NEXT(cb, next); 1000 TAILQ_REMOVE(&src->callbacks, cb, next); 1001 free(cb); 1002 } 1003 rte_intr_instance_free(src->intr_handle); 1004 free(src); 1005 return -1; 1006 } else if (bytes_read == 0) 1007 RTE_LOG(ERR, EAL, "Read nothing from file " 1008 "descriptor %d\n", events[n].data.fd); 1009 else 1010 call = true; 1011 } 1012 1013 /* grab a lock, again to call callbacks and update status. */ 1014 rte_spinlock_lock(&intr_lock); 1015 1016 if (call) { 1017 1018 /* Finally, call all callbacks. */ 1019 TAILQ_FOREACH(cb, &src->callbacks, next) { 1020 1021 /* make a copy and unlock. */ 1022 active_cb = *cb; 1023 rte_spinlock_unlock(&intr_lock); 1024 1025 /* call the actual callback */ 1026 active_cb.cb_fn(active_cb.cb_arg); 1027 1028 /*get the lock back. */ 1029 rte_spinlock_lock(&intr_lock); 1030 } 1031 } 1032 /* we done with that interrupt source, release it. */ 1033 src->active = 0; 1034 1035 rv = 0; 1036 1037 /* check if any callback are supposed to be removed */ 1038 for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) { 1039 next = TAILQ_NEXT(cb, next); 1040 if (cb->pending_delete) { 1041 TAILQ_REMOVE(&src->callbacks, cb, next); 1042 if (cb->ucb_fn) 1043 cb->ucb_fn(src->intr_handle, cb->cb_arg); 1044 free(cb); 1045 rv++; 1046 } 1047 } 1048 1049 /* all callbacks for that source are removed. */ 1050 if (TAILQ_EMPTY(&src->callbacks)) { 1051 TAILQ_REMOVE(&intr_sources, src, next); 1052 rte_intr_instance_free(src->intr_handle); 1053 free(src); 1054 } 1055 1056 /* notify the pipe fd waited by epoll_wait to rebuild the wait list */ 1057 if (rv > 0 && write(intr_pipe.writefd, "1", 1) < 0) { 1058 rte_spinlock_unlock(&intr_lock); 1059 return -EPIPE; 1060 } 1061 1062 rte_spinlock_unlock(&intr_lock); 1063 } 1064 1065 return 0; 1066 } 1067 1068 /** 1069 * It handles all the interrupts. 1070 * 1071 * @param pfd 1072 * epoll file descriptor. 1073 * @param totalfds 1074 * The number of file descriptors added in epoll. 1075 * 1076 * @return 1077 * void 1078 */ 1079 static void 1080 eal_intr_handle_interrupts(int pfd, unsigned totalfds) 1081 { 1082 struct epoll_event events[totalfds]; 1083 int nfds = 0; 1084 1085 for(;;) { 1086 nfds = epoll_wait(pfd, events, totalfds, 1087 EAL_INTR_EPOLL_WAIT_FOREVER); 1088 /* epoll_wait fail */ 1089 if (nfds < 0) { 1090 if (errno == EINTR) 1091 continue; 1092 RTE_LOG(ERR, EAL, 1093 "epoll_wait returns with fail\n"); 1094 return; 1095 } 1096 /* epoll_wait timeout, will never happens here */ 1097 else if (nfds == 0) 1098 continue; 1099 /* epoll_wait has at least one fd ready to read */ 1100 if (eal_intr_process_interrupts(events, nfds) < 0) 1101 return; 1102 } 1103 } 1104 1105 /** 1106 * It builds/rebuilds up the epoll file descriptor with all the 1107 * file descriptors being waited on. Then handles the interrupts. 1108 * 1109 * @param arg 1110 * pointer. (unused) 1111 * 1112 * @return 1113 * never return; 1114 */ 1115 static __rte_noreturn void * 1116 eal_intr_thread_main(__rte_unused void *arg) 1117 { 1118 /* host thread, never break out */ 1119 for (;;) { 1120 /* build up the epoll fd with all descriptors we are to 1121 * wait on then pass it to the handle_interrupts function 1122 */ 1123 static struct epoll_event pipe_event = { 1124 .events = EPOLLIN | EPOLLPRI, 1125 }; 1126 struct rte_intr_source *src; 1127 unsigned numfds = 0; 1128 1129 /* create epoll fd */ 1130 int pfd = epoll_create(1); 1131 if (pfd < 0) 1132 rte_panic("Cannot create epoll instance\n"); 1133 1134 pipe_event.data.fd = intr_pipe.readfd; 1135 /** 1136 * add pipe fd into wait list, this pipe is used to 1137 * rebuild the wait list. 1138 */ 1139 if (epoll_ctl(pfd, EPOLL_CTL_ADD, intr_pipe.readfd, 1140 &pipe_event) < 0) { 1141 rte_panic("Error adding fd to %d epoll_ctl, %s\n", 1142 intr_pipe.readfd, strerror(errno)); 1143 } 1144 numfds++; 1145 1146 rte_spinlock_lock(&intr_lock); 1147 1148 TAILQ_FOREACH(src, &intr_sources, next) { 1149 struct epoll_event ev; 1150 1151 if (src->callbacks.tqh_first == NULL) 1152 continue; /* skip those with no callbacks */ 1153 memset(&ev, 0, sizeof(ev)); 1154 ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP; 1155 ev.data.fd = rte_intr_fd_get(src->intr_handle); 1156 1157 /** 1158 * add all the uio device file descriptor 1159 * into wait list. 1160 */ 1161 if (epoll_ctl(pfd, EPOLL_CTL_ADD, 1162 rte_intr_fd_get(src->intr_handle), &ev) < 0) { 1163 rte_panic("Error adding fd %d epoll_ctl, %s\n", 1164 rte_intr_fd_get(src->intr_handle), 1165 strerror(errno)); 1166 } 1167 else 1168 numfds++; 1169 } 1170 rte_spinlock_unlock(&intr_lock); 1171 /* serve the interrupt */ 1172 eal_intr_handle_interrupts(pfd, numfds); 1173 1174 /** 1175 * when we return, we need to rebuild the 1176 * list of fds to monitor. 1177 */ 1178 close(pfd); 1179 } 1180 } 1181 1182 int 1183 rte_eal_intr_init(void) 1184 { 1185 int ret = 0; 1186 1187 /* init the global interrupt source head */ 1188 TAILQ_INIT(&intr_sources); 1189 1190 /** 1191 * create a pipe which will be waited by epoll and notified to 1192 * rebuild the wait list of epoll. 1193 */ 1194 if (pipe(intr_pipe.pipefd) < 0) { 1195 rte_errno = errno; 1196 return -1; 1197 } 1198 1199 /* create the host thread to wait/handle the interrupt */ 1200 ret = rte_ctrl_thread_create(&intr_thread, "eal-intr-thread", NULL, 1201 eal_intr_thread_main, NULL); 1202 if (ret != 0) { 1203 rte_errno = -ret; 1204 RTE_LOG(ERR, EAL, 1205 "Failed to create thread for interrupt handling\n"); 1206 } 1207 1208 return ret; 1209 } 1210 1211 static void 1212 eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle) 1213 { 1214 union rte_intr_read_buffer buf; 1215 int bytes_read = 0; 1216 int nbytes; 1217 1218 switch (rte_intr_type_get(intr_handle)) { 1219 case RTE_INTR_HANDLE_UIO: 1220 case RTE_INTR_HANDLE_UIO_INTX: 1221 bytes_read = sizeof(buf.uio_intr_count); 1222 break; 1223 #ifdef VFIO_PRESENT 1224 case RTE_INTR_HANDLE_VFIO_MSIX: 1225 case RTE_INTR_HANDLE_VFIO_MSI: 1226 case RTE_INTR_HANDLE_VFIO_LEGACY: 1227 bytes_read = sizeof(buf.vfio_intr_count); 1228 break; 1229 #endif 1230 case RTE_INTR_HANDLE_VDEV: 1231 bytes_read = rte_intr_efd_counter_size_get(intr_handle); 1232 /* For vdev, number of bytes to read is set by driver */ 1233 break; 1234 case RTE_INTR_HANDLE_EXT: 1235 return; 1236 default: 1237 bytes_read = 1; 1238 RTE_LOG(INFO, EAL, "unexpected intr type\n"); 1239 break; 1240 } 1241 1242 /** 1243 * read out to clear the ready-to-be-read flag 1244 * for epoll_wait. 1245 */ 1246 if (bytes_read == 0) 1247 return; 1248 do { 1249 nbytes = read(fd, &buf, bytes_read); 1250 if (nbytes < 0) { 1251 if (errno == EINTR || errno == EWOULDBLOCK || 1252 errno == EAGAIN) 1253 continue; 1254 RTE_LOG(ERR, EAL, 1255 "Error reading from fd %d: %s\n", 1256 fd, strerror(errno)); 1257 } else if (nbytes == 0) 1258 RTE_LOG(ERR, EAL, "Read nothing from fd %d\n", fd); 1259 return; 1260 } while (1); 1261 } 1262 1263 static int 1264 eal_epoll_process_event(struct epoll_event *evs, unsigned int n, 1265 struct rte_epoll_event *events) 1266 { 1267 unsigned int i, count = 0; 1268 struct rte_epoll_event *rev; 1269 uint32_t valid_status; 1270 1271 for (i = 0; i < n; i++) { 1272 rev = evs[i].data.ptr; 1273 valid_status = RTE_EPOLL_VALID; 1274 /* ACQUIRE memory ordering here pairs with RELEASE 1275 * ordering below acting as a lock to synchronize 1276 * the event data updating. 1277 */ 1278 if (!rev || !__atomic_compare_exchange_n(&rev->status, 1279 &valid_status, RTE_EPOLL_EXEC, 0, 1280 __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 1281 continue; 1282 1283 events[count].status = RTE_EPOLL_VALID; 1284 events[count].fd = rev->fd; 1285 events[count].epfd = rev->epfd; 1286 events[count].epdata.event = evs[i].events; 1287 events[count].epdata.data = rev->epdata.data; 1288 if (rev->epdata.cb_fun) 1289 rev->epdata.cb_fun(rev->fd, 1290 rev->epdata.cb_arg); 1291 1292 /* the status update should be observed after 1293 * the other fields change. 1294 */ 1295 __atomic_store_n(&rev->status, RTE_EPOLL_VALID, 1296 __ATOMIC_RELEASE); 1297 count++; 1298 } 1299 return count; 1300 } 1301 1302 static inline int 1303 eal_init_tls_epfd(void) 1304 { 1305 int pfd = epoll_create(255); 1306 1307 if (pfd < 0) { 1308 RTE_LOG(ERR, EAL, 1309 "Cannot create epoll instance\n"); 1310 return -1; 1311 } 1312 return pfd; 1313 } 1314 1315 int 1316 rte_intr_tls_epfd(void) 1317 { 1318 if (RTE_PER_LCORE(_epfd) == -1) 1319 RTE_PER_LCORE(_epfd) = eal_init_tls_epfd(); 1320 1321 return RTE_PER_LCORE(_epfd); 1322 } 1323 1324 static int 1325 eal_epoll_wait(int epfd, struct rte_epoll_event *events, 1326 int maxevents, int timeout, bool interruptible) 1327 { 1328 struct epoll_event evs[maxevents]; 1329 int rc; 1330 1331 if (!events) { 1332 RTE_LOG(ERR, EAL, "rte_epoll_event can't be NULL\n"); 1333 return -1; 1334 } 1335 1336 /* using per thread epoll fd */ 1337 if (epfd == RTE_EPOLL_PER_THREAD) 1338 epfd = rte_intr_tls_epfd(); 1339 1340 while (1) { 1341 rc = epoll_wait(epfd, evs, maxevents, timeout); 1342 if (likely(rc > 0)) { 1343 /* epoll_wait has at least one fd ready to read */ 1344 rc = eal_epoll_process_event(evs, rc, events); 1345 break; 1346 } else if (rc < 0) { 1347 if (errno == EINTR) { 1348 if (interruptible) 1349 return -1; 1350 else 1351 continue; 1352 } 1353 /* epoll_wait fail */ 1354 RTE_LOG(ERR, EAL, "epoll_wait returns with fail %s\n", 1355 strerror(errno)); 1356 rc = -1; 1357 break; 1358 } else { 1359 /* rc == 0, epoll_wait timed out */ 1360 break; 1361 } 1362 } 1363 1364 return rc; 1365 } 1366 1367 int 1368 rte_epoll_wait(int epfd, struct rte_epoll_event *events, 1369 int maxevents, int timeout) 1370 { 1371 return eal_epoll_wait(epfd, events, maxevents, timeout, false); 1372 } 1373 1374 int 1375 rte_epoll_wait_interruptible(int epfd, struct rte_epoll_event *events, 1376 int maxevents, int timeout) 1377 { 1378 return eal_epoll_wait(epfd, events, maxevents, timeout, true); 1379 } 1380 1381 static inline void 1382 eal_epoll_data_safe_free(struct rte_epoll_event *ev) 1383 { 1384 uint32_t valid_status = RTE_EPOLL_VALID; 1385 1386 while (!__atomic_compare_exchange_n(&ev->status, &valid_status, 1387 RTE_EPOLL_INVALID, 0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) { 1388 while (__atomic_load_n(&ev->status, 1389 __ATOMIC_RELAXED) != RTE_EPOLL_VALID) 1390 rte_pause(); 1391 valid_status = RTE_EPOLL_VALID; 1392 } 1393 memset(&ev->epdata, 0, sizeof(ev->epdata)); 1394 ev->fd = -1; 1395 ev->epfd = -1; 1396 } 1397 1398 int 1399 rte_epoll_ctl(int epfd, int op, int fd, 1400 struct rte_epoll_event *event) 1401 { 1402 struct epoll_event ev; 1403 1404 if (!event) { 1405 RTE_LOG(ERR, EAL, "rte_epoll_event can't be NULL\n"); 1406 return -1; 1407 } 1408 1409 /* using per thread epoll fd */ 1410 if (epfd == RTE_EPOLL_PER_THREAD) 1411 epfd = rte_intr_tls_epfd(); 1412 1413 if (op == EPOLL_CTL_ADD) { 1414 __atomic_store_n(&event->status, RTE_EPOLL_VALID, 1415 __ATOMIC_RELAXED); 1416 event->fd = fd; /* ignore fd in event */ 1417 event->epfd = epfd; 1418 ev.data.ptr = (void *)event; 1419 } 1420 1421 ev.events = event->epdata.event; 1422 if (epoll_ctl(epfd, op, fd, &ev) < 0) { 1423 RTE_LOG(ERR, EAL, "Error op %d fd %d epoll_ctl, %s\n", 1424 op, fd, strerror(errno)); 1425 if (op == EPOLL_CTL_ADD) 1426 /* rollback status when CTL_ADD fail */ 1427 __atomic_store_n(&event->status, RTE_EPOLL_INVALID, 1428 __ATOMIC_RELAXED); 1429 return -1; 1430 } 1431 1432 if (op == EPOLL_CTL_DEL && __atomic_load_n(&event->status, 1433 __ATOMIC_RELAXED) != RTE_EPOLL_INVALID) 1434 eal_epoll_data_safe_free(event); 1435 1436 return 0; 1437 } 1438 1439 int 1440 rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, int epfd, 1441 int op, unsigned int vec, void *data) 1442 { 1443 struct rte_epoll_event *rev; 1444 struct rte_epoll_data *epdata; 1445 int epfd_op; 1446 unsigned int efd_idx; 1447 int rc = 0; 1448 1449 efd_idx = (vec >= RTE_INTR_VEC_RXTX_OFFSET) ? 1450 (vec - RTE_INTR_VEC_RXTX_OFFSET) : vec; 1451 1452 if (intr_handle == NULL || rte_intr_nb_efd_get(intr_handle) == 0 || 1453 efd_idx >= (unsigned int)rte_intr_nb_efd_get(intr_handle)) { 1454 RTE_LOG(ERR, EAL, "Wrong intr vector number.\n"); 1455 return -EPERM; 1456 } 1457 1458 switch (op) { 1459 case RTE_INTR_EVENT_ADD: 1460 epfd_op = EPOLL_CTL_ADD; 1461 rev = rte_intr_elist_index_get(intr_handle, efd_idx); 1462 if (__atomic_load_n(&rev->status, 1463 __ATOMIC_RELAXED) != RTE_EPOLL_INVALID) { 1464 RTE_LOG(INFO, EAL, "Event already been added.\n"); 1465 return -EEXIST; 1466 } 1467 1468 /* attach to intr vector fd */ 1469 epdata = &rev->epdata; 1470 epdata->event = EPOLLIN | EPOLLPRI | EPOLLET; 1471 epdata->data = data; 1472 epdata->cb_fun = (rte_intr_event_cb_t)eal_intr_proc_rxtx_intr; 1473 epdata->cb_arg = (void *)intr_handle; 1474 rc = rte_epoll_ctl(epfd, epfd_op, 1475 rte_intr_efds_index_get(intr_handle, efd_idx), rev); 1476 if (!rc) 1477 RTE_LOG(DEBUG, EAL, 1478 "efd %d associated with vec %d added on epfd %d" 1479 "\n", rev->fd, vec, epfd); 1480 else 1481 rc = -EPERM; 1482 break; 1483 case RTE_INTR_EVENT_DEL: 1484 epfd_op = EPOLL_CTL_DEL; 1485 rev = rte_intr_elist_index_get(intr_handle, efd_idx); 1486 if (__atomic_load_n(&rev->status, 1487 __ATOMIC_RELAXED) == RTE_EPOLL_INVALID) { 1488 RTE_LOG(INFO, EAL, "Event does not exist.\n"); 1489 return -EPERM; 1490 } 1491 1492 rc = rte_epoll_ctl(rev->epfd, epfd_op, rev->fd, rev); 1493 if (rc) 1494 rc = -EPERM; 1495 break; 1496 default: 1497 RTE_LOG(ERR, EAL, "event op type mismatch\n"); 1498 rc = -EPERM; 1499 } 1500 1501 return rc; 1502 } 1503 1504 void 1505 rte_intr_free_epoll_fd(struct rte_intr_handle *intr_handle) 1506 { 1507 uint32_t i; 1508 struct rte_epoll_event *rev; 1509 1510 for (i = 0; i < (uint32_t)rte_intr_nb_efd_get(intr_handle); i++) { 1511 rev = rte_intr_elist_index_get(intr_handle, i); 1512 if (__atomic_load_n(&rev->status, 1513 __ATOMIC_RELAXED) == RTE_EPOLL_INVALID) 1514 continue; 1515 if (rte_epoll_ctl(rev->epfd, EPOLL_CTL_DEL, rev->fd, rev)) { 1516 /* force free if the entry valid */ 1517 eal_epoll_data_safe_free(rev); 1518 } 1519 } 1520 } 1521 1522 int 1523 rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd) 1524 { 1525 uint32_t i; 1526 int fd; 1527 uint32_t n = RTE_MIN(nb_efd, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 1528 1529 assert(nb_efd != 0); 1530 1531 if (rte_intr_type_get(intr_handle) == RTE_INTR_HANDLE_VFIO_MSIX) { 1532 for (i = 0; i < n; i++) { 1533 fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); 1534 if (fd < 0) { 1535 RTE_LOG(ERR, EAL, 1536 "can't setup eventfd, error %i (%s)\n", 1537 errno, strerror(errno)); 1538 return -errno; 1539 } 1540 1541 if (rte_intr_efds_index_set(intr_handle, i, fd)) 1542 return -rte_errno; 1543 } 1544 1545 if (rte_intr_nb_efd_set(intr_handle, n)) 1546 return -rte_errno; 1547 1548 if (rte_intr_max_intr_set(intr_handle, NB_OTHER_INTR + n)) 1549 return -rte_errno; 1550 } else if (rte_intr_type_get(intr_handle) == RTE_INTR_HANDLE_VDEV) { 1551 /* only check, initialization would be done in vdev driver.*/ 1552 if ((uint64_t)rte_intr_efd_counter_size_get(intr_handle) > 1553 sizeof(union rte_intr_read_buffer)) { 1554 RTE_LOG(ERR, EAL, "the efd_counter_size is oversized"); 1555 return -EINVAL; 1556 } 1557 } else { 1558 if (rte_intr_efds_index_set(intr_handle, 0, rte_intr_fd_get(intr_handle))) 1559 return -rte_errno; 1560 if (rte_intr_nb_efd_set(intr_handle, RTE_MIN(nb_efd, 1U))) 1561 return -rte_errno; 1562 if (rte_intr_max_intr_set(intr_handle, NB_OTHER_INTR)) 1563 return -rte_errno; 1564 } 1565 1566 return 0; 1567 } 1568 1569 void 1570 rte_intr_efd_disable(struct rte_intr_handle *intr_handle) 1571 { 1572 uint32_t i; 1573 1574 rte_intr_free_epoll_fd(intr_handle); 1575 if (rte_intr_max_intr_get(intr_handle) > rte_intr_nb_efd_get(intr_handle)) { 1576 for (i = 0; i < (uint32_t)rte_intr_nb_efd_get(intr_handle); i++) 1577 close(rte_intr_efds_index_get(intr_handle, i)); 1578 } 1579 rte_intr_nb_efd_set(intr_handle, 0); 1580 rte_intr_max_intr_set(intr_handle, 0); 1581 } 1582 1583 int 1584 rte_intr_dp_is_en(struct rte_intr_handle *intr_handle) 1585 { 1586 return !(!rte_intr_nb_efd_get(intr_handle)); 1587 } 1588 1589 int 1590 rte_intr_allow_others(struct rte_intr_handle *intr_handle) 1591 { 1592 if (!rte_intr_dp_is_en(intr_handle)) 1593 return 1; 1594 else 1595 return !!(rte_intr_max_intr_get(intr_handle) - 1596 rte_intr_nb_efd_get(intr_handle)); 1597 } 1598 1599 int 1600 rte_intr_cap_multiple(struct rte_intr_handle *intr_handle) 1601 { 1602 if (rte_intr_type_get(intr_handle) == RTE_INTR_HANDLE_VFIO_MSIX) 1603 return 1; 1604 1605 if (rte_intr_type_get(intr_handle) == RTE_INTR_HANDLE_VDEV) 1606 return 1; 1607 1608 return 0; 1609 } 1610 1611 int rte_thread_is_intr(void) 1612 { 1613 return pthread_equal(intr_thread, pthread_self()); 1614 } 1615