1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <stdint.h> 7 #include <stdlib.h> 8 #include <pthread.h> 9 #include <sys/queue.h> 10 #include <stdarg.h> 11 #include <unistd.h> 12 #include <string.h> 13 #include <errno.h> 14 #include <inttypes.h> 15 #include <sys/epoll.h> 16 #include <sys/signalfd.h> 17 #include <sys/ioctl.h> 18 #include <sys/eventfd.h> 19 #include <assert.h> 20 #include <stdbool.h> 21 22 #include <rte_common.h> 23 #include <rte_interrupts.h> 24 #include <rte_memory.h> 25 #include <rte_launch.h> 26 #include <rte_eal.h> 27 #include <rte_per_lcore.h> 28 #include <rte_lcore.h> 29 #include <rte_branch_prediction.h> 30 #include <rte_debug.h> 31 #include <rte_log.h> 32 #include <rte_errno.h> 33 #include <rte_spinlock.h> 34 #include <rte_pause.h> 35 #include <rte_vfio.h> 36 #include <rte_eal_trace.h> 37 38 #include "eal_private.h" 39 #include "eal_vfio.h" 40 #include "eal_thread.h" 41 42 #define EAL_INTR_EPOLL_WAIT_FOREVER (-1) 43 #define NB_OTHER_INTR 1 44 45 static RTE_DEFINE_PER_LCORE(int, _epfd) = -1; /**< epoll fd per thread */ 46 47 /** 48 * union for pipe fds. 49 */ 50 union intr_pipefds{ 51 struct { 52 int pipefd[2]; 53 }; 54 struct { 55 int readfd; 56 int writefd; 57 }; 58 }; 59 60 /** 61 * union buffer for reading on different devices 62 */ 63 union rte_intr_read_buffer { 64 int uio_intr_count; /* for uio device */ 65 #ifdef VFIO_PRESENT 66 uint64_t vfio_intr_count; /* for vfio device */ 67 #endif 68 uint64_t timerfd_num; /* for timerfd */ 69 char charbuf[16]; /* for others */ 70 }; 71 72 TAILQ_HEAD(rte_intr_cb_list, rte_intr_callback); 73 TAILQ_HEAD(rte_intr_source_list, rte_intr_source); 74 75 struct rte_intr_callback { 76 TAILQ_ENTRY(rte_intr_callback) next; 77 rte_intr_callback_fn cb_fn; /**< callback address */ 78 void *cb_arg; /**< parameter for callback */ 79 uint8_t pending_delete; /**< delete after callback is called */ 80 rte_intr_unregister_callback_fn ucb_fn; /**< fn to call before cb is deleted */ 81 }; 82 83 struct rte_intr_source { 84 TAILQ_ENTRY(rte_intr_source) next; 85 struct rte_intr_handle *intr_handle; /**< interrupt handle */ 86 struct rte_intr_cb_list callbacks; /**< user callbacks */ 87 uint32_t active; 88 }; 89 90 /* global spinlock for interrupt data operation */ 91 static rte_spinlock_t intr_lock = RTE_SPINLOCK_INITIALIZER; 92 93 /* union buffer for pipe read/write */ 94 static union intr_pipefds intr_pipe; 95 96 /* interrupt sources list */ 97 static struct rte_intr_source_list intr_sources; 98 99 /* interrupt handling thread */ 100 static pthread_t intr_thread; 101 102 /* VFIO interrupts */ 103 #ifdef VFIO_PRESENT 104 105 #define IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + sizeof(int)) 106 /* irq set buffer length for queue interrupts and LSC interrupt */ 107 #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \ 108 sizeof(int) * (RTE_MAX_RXTX_INTR_VEC_ID + 1)) 109 110 /* enable legacy (INTx) interrupts */ 111 static int 112 vfio_enable_intx(const struct rte_intr_handle *intr_handle) { 113 struct vfio_irq_set *irq_set; 114 char irq_set_buf[IRQ_SET_BUF_LEN]; 115 int len, ret, vfio_dev_fd; 116 int *fd_ptr; 117 118 len = sizeof(irq_set_buf); 119 120 /* enable INTx */ 121 irq_set = (struct vfio_irq_set *) irq_set_buf; 122 irq_set->argsz = len; 123 irq_set->count = 1; 124 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; 125 irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; 126 irq_set->start = 0; 127 fd_ptr = (int *) &irq_set->data; 128 *fd_ptr = rte_intr_fd_get(intr_handle); 129 130 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 131 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 132 133 if (ret) { 134 RTE_LOG(ERR, EAL, "Error enabling INTx interrupts for fd %d\n", 135 rte_intr_fd_get(intr_handle)); 136 return -1; 137 } 138 139 /* unmask INTx after enabling */ 140 memset(irq_set, 0, len); 141 len = sizeof(struct vfio_irq_set); 142 irq_set->argsz = len; 143 irq_set->count = 1; 144 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; 145 irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; 146 irq_set->start = 0; 147 148 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 149 150 if (ret) { 151 RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", 152 rte_intr_fd_get(intr_handle)); 153 return -1; 154 } 155 return 0; 156 } 157 158 /* disable legacy (INTx) interrupts */ 159 static int 160 vfio_disable_intx(const struct rte_intr_handle *intr_handle) { 161 struct vfio_irq_set *irq_set; 162 char irq_set_buf[IRQ_SET_BUF_LEN]; 163 int len, ret, vfio_dev_fd; 164 165 len = sizeof(struct vfio_irq_set); 166 167 /* mask interrupts before disabling */ 168 irq_set = (struct vfio_irq_set *) irq_set_buf; 169 irq_set->argsz = len; 170 irq_set->count = 1; 171 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK; 172 irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; 173 irq_set->start = 0; 174 175 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 176 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 177 178 if (ret) { 179 RTE_LOG(ERR, EAL, "Error masking INTx interrupts for fd %d\n", 180 rte_intr_fd_get(intr_handle)); 181 return -1; 182 } 183 184 /* disable INTx*/ 185 memset(irq_set, 0, len); 186 irq_set->argsz = len; 187 irq_set->count = 0; 188 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; 189 irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; 190 irq_set->start = 0; 191 192 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 193 194 if (ret) { 195 RTE_LOG(ERR, EAL, "Error disabling INTx interrupts for fd %d\n", 196 rte_intr_fd_get(intr_handle)); 197 return -1; 198 } 199 return 0; 200 } 201 202 /* unmask/ack legacy (INTx) interrupts */ 203 static int 204 vfio_ack_intx(const struct rte_intr_handle *intr_handle) 205 { 206 struct vfio_irq_set irq_set; 207 int vfio_dev_fd; 208 209 /* unmask INTx */ 210 memset(&irq_set, 0, sizeof(irq_set)); 211 irq_set.argsz = sizeof(irq_set); 212 irq_set.count = 1; 213 irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; 214 irq_set.index = VFIO_PCI_INTX_IRQ_INDEX; 215 irq_set.start = 0; 216 217 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 218 if (ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set)) { 219 RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", 220 rte_intr_fd_get(intr_handle)); 221 return -1; 222 } 223 return 0; 224 } 225 226 /* enable MSI interrupts */ 227 static int 228 vfio_enable_msi(const struct rte_intr_handle *intr_handle) { 229 int len, ret; 230 char irq_set_buf[IRQ_SET_BUF_LEN]; 231 struct vfio_irq_set *irq_set; 232 int *fd_ptr, vfio_dev_fd; 233 234 len = sizeof(irq_set_buf); 235 236 irq_set = (struct vfio_irq_set *) irq_set_buf; 237 irq_set->argsz = len; 238 irq_set->count = 1; 239 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; 240 irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; 241 irq_set->start = 0; 242 fd_ptr = (int *) &irq_set->data; 243 *fd_ptr = rte_intr_fd_get(intr_handle); 244 245 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 246 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 247 248 if (ret) { 249 RTE_LOG(ERR, EAL, "Error enabling MSI interrupts for fd %d\n", 250 rte_intr_fd_get(intr_handle)); 251 return -1; 252 } 253 return 0; 254 } 255 256 /* disable MSI interrupts */ 257 static int 258 vfio_disable_msi(const struct rte_intr_handle *intr_handle) { 259 struct vfio_irq_set *irq_set; 260 char irq_set_buf[IRQ_SET_BUF_LEN]; 261 int len, ret, vfio_dev_fd; 262 263 len = sizeof(struct vfio_irq_set); 264 265 irq_set = (struct vfio_irq_set *) irq_set_buf; 266 irq_set->argsz = len; 267 irq_set->count = 0; 268 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; 269 irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; 270 irq_set->start = 0; 271 272 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 273 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 274 if (ret) 275 RTE_LOG(ERR, EAL, "Error disabling MSI interrupts for fd %d\n", 276 rte_intr_fd_get(intr_handle)); 277 278 return ret; 279 } 280 281 /* enable MSI-X interrupts */ 282 static int 283 vfio_enable_msix(const struct rte_intr_handle *intr_handle) { 284 int len, ret; 285 char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; 286 struct vfio_irq_set *irq_set; 287 int *fd_ptr, vfio_dev_fd, i; 288 289 len = sizeof(irq_set_buf); 290 291 irq_set = (struct vfio_irq_set *) irq_set_buf; 292 irq_set->argsz = len; 293 /* 0 < irq_set->count < RTE_MAX_RXTX_INTR_VEC_ID + 1 */ 294 irq_set->count = rte_intr_max_intr_get(intr_handle) ? 295 (rte_intr_max_intr_get(intr_handle) > 296 RTE_MAX_RXTX_INTR_VEC_ID + 1 ? RTE_MAX_RXTX_INTR_VEC_ID + 1 : 297 rte_intr_max_intr_get(intr_handle)) : 1; 298 299 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; 300 irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; 301 irq_set->start = 0; 302 fd_ptr = (int *) &irq_set->data; 303 /* INTR vector offset 0 reserve for non-efds mapping */ 304 fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = rte_intr_fd_get(intr_handle); 305 for (i = 0; i < rte_intr_nb_efd_get(intr_handle); i++) { 306 fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = 307 rte_intr_efds_index_get(intr_handle, i); 308 } 309 310 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 311 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 312 313 if (ret) { 314 RTE_LOG(ERR, EAL, "Error enabling MSI-X interrupts for fd %d\n", 315 rte_intr_fd_get(intr_handle)); 316 return -1; 317 } 318 319 return 0; 320 } 321 322 /* disable MSI-X interrupts */ 323 static int 324 vfio_disable_msix(const struct rte_intr_handle *intr_handle) { 325 struct vfio_irq_set *irq_set; 326 char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; 327 int len, ret, vfio_dev_fd; 328 329 len = sizeof(struct vfio_irq_set); 330 331 irq_set = (struct vfio_irq_set *) irq_set_buf; 332 irq_set->argsz = len; 333 irq_set->count = 0; 334 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; 335 irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; 336 irq_set->start = 0; 337 338 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 339 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 340 341 if (ret) 342 RTE_LOG(ERR, EAL, "Error disabling MSI-X interrupts for fd %d\n", 343 rte_intr_fd_get(intr_handle)); 344 345 return ret; 346 } 347 348 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE 349 /* enable req notifier */ 350 static int 351 vfio_enable_req(const struct rte_intr_handle *intr_handle) 352 { 353 int len, ret; 354 char irq_set_buf[IRQ_SET_BUF_LEN]; 355 struct vfio_irq_set *irq_set; 356 int *fd_ptr, vfio_dev_fd; 357 358 len = sizeof(irq_set_buf); 359 360 irq_set = (struct vfio_irq_set *) irq_set_buf; 361 irq_set->argsz = len; 362 irq_set->count = 1; 363 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | 364 VFIO_IRQ_SET_ACTION_TRIGGER; 365 irq_set->index = VFIO_PCI_REQ_IRQ_INDEX; 366 irq_set->start = 0; 367 fd_ptr = (int *) &irq_set->data; 368 *fd_ptr = rte_intr_fd_get(intr_handle); 369 370 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 371 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 372 373 if (ret) { 374 RTE_LOG(ERR, EAL, "Error enabling req interrupts for fd %d\n", 375 rte_intr_fd_get(intr_handle)); 376 return -1; 377 } 378 379 return 0; 380 } 381 382 /* disable req notifier */ 383 static int 384 vfio_disable_req(const struct rte_intr_handle *intr_handle) 385 { 386 struct vfio_irq_set *irq_set; 387 char irq_set_buf[IRQ_SET_BUF_LEN]; 388 int len, ret, vfio_dev_fd; 389 390 len = sizeof(struct vfio_irq_set); 391 392 irq_set = (struct vfio_irq_set *) irq_set_buf; 393 irq_set->argsz = len; 394 irq_set->count = 0; 395 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; 396 irq_set->index = VFIO_PCI_REQ_IRQ_INDEX; 397 irq_set->start = 0; 398 399 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle); 400 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); 401 402 if (ret) 403 RTE_LOG(ERR, EAL, "Error disabling req interrupts for fd %d\n", 404 rte_intr_fd_get(intr_handle)); 405 406 return ret; 407 } 408 #endif 409 #endif 410 411 static int 412 uio_intx_intr_disable(const struct rte_intr_handle *intr_handle) 413 { 414 unsigned char command_high; 415 int uio_cfg_fd; 416 417 /* use UIO config file descriptor for uio_pci_generic */ 418 uio_cfg_fd = rte_intr_dev_fd_get(intr_handle); 419 if (uio_cfg_fd < 0 || pread(uio_cfg_fd, &command_high, 1, 5) != 1) { 420 RTE_LOG(ERR, EAL, 421 "Error reading interrupts status for fd %d\n", 422 uio_cfg_fd); 423 return -1; 424 } 425 /* disable interrupts */ 426 command_high |= 0x4; 427 if (pwrite(uio_cfg_fd, &command_high, 1, 5) != 1) { 428 RTE_LOG(ERR, EAL, 429 "Error disabling interrupts for fd %d\n", 430 uio_cfg_fd); 431 return -1; 432 } 433 434 return 0; 435 } 436 437 static int 438 uio_intx_intr_enable(const struct rte_intr_handle *intr_handle) 439 { 440 unsigned char command_high; 441 int uio_cfg_fd; 442 443 /* use UIO config file descriptor for uio_pci_generic */ 444 uio_cfg_fd = rte_intr_dev_fd_get(intr_handle); 445 if (uio_cfg_fd < 0 || pread(uio_cfg_fd, &command_high, 1, 5) != 1) { 446 RTE_LOG(ERR, EAL, 447 "Error reading interrupts status for fd %d\n", 448 uio_cfg_fd); 449 return -1; 450 } 451 /* enable interrupts */ 452 command_high &= ~0x4; 453 if (pwrite(uio_cfg_fd, &command_high, 1, 5) != 1) { 454 RTE_LOG(ERR, EAL, 455 "Error enabling interrupts for fd %d\n", 456 uio_cfg_fd); 457 return -1; 458 } 459 460 return 0; 461 } 462 463 static int 464 uio_intr_disable(const struct rte_intr_handle *intr_handle) 465 { 466 const int value = 0; 467 468 if (rte_intr_fd_get(intr_handle) < 0 || 469 write(rte_intr_fd_get(intr_handle), &value, sizeof(value)) < 0) { 470 RTE_LOG(ERR, EAL, "Error disabling interrupts for fd %d (%s)\n", 471 rte_intr_fd_get(intr_handle), strerror(errno)); 472 return -1; 473 } 474 return 0; 475 } 476 477 static int 478 uio_intr_enable(const struct rte_intr_handle *intr_handle) 479 { 480 const int value = 1; 481 482 if (rte_intr_fd_get(intr_handle) < 0 || 483 write(rte_intr_fd_get(intr_handle), &value, sizeof(value)) < 0) { 484 RTE_LOG(ERR, EAL, "Error enabling interrupts for fd %d (%s)\n", 485 rte_intr_fd_get(intr_handle), strerror(errno)); 486 return -1; 487 } 488 return 0; 489 } 490 491 int 492 rte_intr_callback_register(const struct rte_intr_handle *intr_handle, 493 rte_intr_callback_fn cb, void *cb_arg) 494 { 495 int ret, wake_thread; 496 struct rte_intr_source *src; 497 struct rte_intr_callback *callback; 498 499 wake_thread = 0; 500 501 /* first do parameter checking */ 502 if (rte_intr_fd_get(intr_handle) < 0 || cb == NULL) { 503 RTE_LOG(ERR, EAL, "Registering with invalid input parameter\n"); 504 return -EINVAL; 505 } 506 507 /* allocate a new interrupt callback entity */ 508 callback = calloc(1, sizeof(*callback)); 509 if (callback == NULL) { 510 RTE_LOG(ERR, EAL, "Can not allocate memory\n"); 511 return -ENOMEM; 512 } 513 callback->cb_fn = cb; 514 callback->cb_arg = cb_arg; 515 callback->pending_delete = 0; 516 callback->ucb_fn = NULL; 517 518 rte_spinlock_lock(&intr_lock); 519 520 /* check if there is at least one callback registered for the fd */ 521 TAILQ_FOREACH(src, &intr_sources, next) { 522 if (rte_intr_fd_get(src->intr_handle) == rte_intr_fd_get(intr_handle)) { 523 /* we had no interrupts for this */ 524 if (TAILQ_EMPTY(&src->callbacks)) 525 wake_thread = 1; 526 527 TAILQ_INSERT_TAIL(&(src->callbacks), callback, next); 528 ret = 0; 529 break; 530 } 531 } 532 533 /* no existing callbacks for this - add new source */ 534 if (src == NULL) { 535 src = calloc(1, sizeof(*src)); 536 if (src == NULL) { 537 RTE_LOG(ERR, EAL, "Can not allocate memory\n"); 538 ret = -ENOMEM; 539 free(callback); 540 callback = NULL; 541 } else { 542 src->intr_handle = rte_intr_instance_dup(intr_handle); 543 if (src->intr_handle == NULL) { 544 RTE_LOG(ERR, EAL, "Can not create intr instance\n"); 545 ret = -ENOMEM; 546 free(callback); 547 callback = NULL; 548 free(src); 549 src = NULL; 550 } else { 551 TAILQ_INIT(&src->callbacks); 552 TAILQ_INSERT_TAIL(&(src->callbacks), callback, 553 next); 554 TAILQ_INSERT_TAIL(&intr_sources, src, next); 555 wake_thread = 1; 556 ret = 0; 557 } 558 } 559 } 560 561 rte_spinlock_unlock(&intr_lock); 562 563 /** 564 * check if need to notify the pipe fd waited by epoll_wait to 565 * rebuild the wait list. 566 */ 567 if (wake_thread) 568 if (write(intr_pipe.writefd, "1", 1) < 0) 569 ret = -EPIPE; 570 571 rte_eal_trace_intr_callback_register(intr_handle, cb, cb_arg, ret); 572 return ret; 573 } 574 575 int 576 rte_intr_callback_unregister_pending(const struct rte_intr_handle *intr_handle, 577 rte_intr_callback_fn cb_fn, void *cb_arg, 578 rte_intr_unregister_callback_fn ucb_fn) 579 { 580 int ret; 581 struct rte_intr_source *src; 582 struct rte_intr_callback *cb, *next; 583 584 /* do parameter checking first */ 585 if (rte_intr_fd_get(intr_handle) < 0) { 586 RTE_LOG(ERR, EAL, "Unregistering with invalid input parameter\n"); 587 return -EINVAL; 588 } 589 590 rte_spinlock_lock(&intr_lock); 591 592 /* check if the insterrupt source for the fd is existent */ 593 TAILQ_FOREACH(src, &intr_sources, next) { 594 if (rte_intr_fd_get(src->intr_handle) == rte_intr_fd_get(intr_handle)) 595 break; 596 } 597 598 /* No interrupt source registered for the fd */ 599 if (src == NULL) { 600 ret = -ENOENT; 601 602 /* only usable if the source is active */ 603 } else if (src->active == 0) { 604 ret = -EAGAIN; 605 606 } else { 607 ret = 0; 608 609 /* walk through the callbacks and mark all that match. */ 610 for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) { 611 next = TAILQ_NEXT(cb, next); 612 if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 || 613 cb->cb_arg == cb_arg)) { 614 cb->pending_delete = 1; 615 cb->ucb_fn = ucb_fn; 616 ret++; 617 } 618 } 619 } 620 621 rte_spinlock_unlock(&intr_lock); 622 623 return ret; 624 } 625 626 int 627 rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle, 628 rte_intr_callback_fn cb_fn, void *cb_arg) 629 { 630 int ret; 631 struct rte_intr_source *src; 632 struct rte_intr_callback *cb, *next; 633 634 /* do parameter checking first */ 635 if (rte_intr_fd_get(intr_handle) < 0) { 636 RTE_LOG(ERR, EAL, "Unregistering with invalid input parameter\n"); 637 return -EINVAL; 638 } 639 640 rte_spinlock_lock(&intr_lock); 641 642 /* check if the insterrupt source for the fd is existent */ 643 TAILQ_FOREACH(src, &intr_sources, next) 644 if (rte_intr_fd_get(src->intr_handle) == rte_intr_fd_get(intr_handle)) 645 break; 646 647 /* No interrupt source registered for the fd */ 648 if (src == NULL) { 649 ret = -ENOENT; 650 651 /* interrupt source has some active callbacks right now. */ 652 } else if (src->active != 0) { 653 ret = -EAGAIN; 654 655 /* ok to remove. */ 656 } else { 657 ret = 0; 658 659 /*walk through the callbacks and remove all that match. */ 660 for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) { 661 662 next = TAILQ_NEXT(cb, next); 663 664 if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 || 665 cb->cb_arg == cb_arg)) { 666 TAILQ_REMOVE(&src->callbacks, cb, next); 667 free(cb); 668 ret++; 669 } 670 } 671 672 /* all callbacks for that source are removed. */ 673 if (TAILQ_EMPTY(&src->callbacks)) { 674 TAILQ_REMOVE(&intr_sources, src, next); 675 rte_intr_instance_free(src->intr_handle); 676 free(src); 677 } 678 } 679 680 rte_spinlock_unlock(&intr_lock); 681 682 /* notify the pipe fd waited by epoll_wait to rebuild the wait list */ 683 if (ret >= 0 && write(intr_pipe.writefd, "1", 1) < 0) { 684 ret = -EPIPE; 685 } 686 687 rte_eal_trace_intr_callback_unregister(intr_handle, cb_fn, cb_arg, 688 ret); 689 return ret; 690 } 691 692 int 693 rte_intr_callback_unregister_sync(const struct rte_intr_handle *intr_handle, 694 rte_intr_callback_fn cb_fn, void *cb_arg) 695 { 696 int ret = 0; 697 698 while ((ret = rte_intr_callback_unregister(intr_handle, cb_fn, cb_arg)) == -EAGAIN) 699 rte_pause(); 700 701 return ret; 702 } 703 704 int 705 rte_intr_enable(const struct rte_intr_handle *intr_handle) 706 { 707 int rc = 0, uio_cfg_fd; 708 709 if (intr_handle == NULL) 710 return -1; 711 712 if (rte_intr_type_get(intr_handle) == RTE_INTR_HANDLE_VDEV) { 713 rc = 0; 714 goto out; 715 } 716 717 uio_cfg_fd = rte_intr_dev_fd_get(intr_handle); 718 if (rte_intr_fd_get(intr_handle) < 0 || uio_cfg_fd < 0) { 719 rc = -1; 720 goto out; 721 } 722 723 switch (rte_intr_type_get(intr_handle)) { 724 /* write to the uio fd to enable the interrupt */ 725 case RTE_INTR_HANDLE_UIO: 726 if (uio_intr_enable(intr_handle)) 727 rc = -1; 728 break; 729 case RTE_INTR_HANDLE_UIO_INTX: 730 if (uio_intx_intr_enable(intr_handle)) 731 rc = -1; 732 break; 733 /* not used at this moment */ 734 case RTE_INTR_HANDLE_ALARM: 735 rc = -1; 736 break; 737 #ifdef VFIO_PRESENT 738 case RTE_INTR_HANDLE_VFIO_MSIX: 739 if (vfio_enable_msix(intr_handle)) 740 rc = -1; 741 break; 742 case RTE_INTR_HANDLE_VFIO_MSI: 743 if (vfio_enable_msi(intr_handle)) 744 rc = -1; 745 break; 746 case RTE_INTR_HANDLE_VFIO_LEGACY: 747 if (vfio_enable_intx(intr_handle)) 748 rc = -1; 749 break; 750 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE 751 case RTE_INTR_HANDLE_VFIO_REQ: 752 if (vfio_enable_req(intr_handle)) 753 rc = -1; 754 break; 755 #endif 756 #endif 757 /* not used at this moment */ 758 case RTE_INTR_HANDLE_DEV_EVENT: 759 rc = -1; 760 break; 761 /* unknown handle type */ 762 default: 763 RTE_LOG(ERR, EAL, "Unknown handle type of fd %d\n", 764 rte_intr_fd_get(intr_handle)); 765 rc = -1; 766 break; 767 } 768 out: 769 rte_eal_trace_intr_enable(intr_handle, rc); 770 return rc; 771 } 772 773 /** 774 * PMD generally calls this function at the end of its IRQ callback. 775 * Internally, it unmasks the interrupt if possible. 776 * 777 * For INTx, unmasking is required as the interrupt is auto-masked prior to 778 * invoking callback. 779 * 780 * For MSI/MSI-X, unmasking is typically not needed as the interrupt is not 781 * auto-masked. In fact, for interrupt handle types VFIO_MSIX and VFIO_MSI, 782 * this function is no-op. 783 */ 784 int 785 rte_intr_ack(const struct rte_intr_handle *intr_handle) 786 { 787 int uio_cfg_fd; 788 789 if (rte_intr_type_get(intr_handle) == RTE_INTR_HANDLE_VDEV) 790 return 0; 791 792 uio_cfg_fd = rte_intr_dev_fd_get(intr_handle); 793 if (rte_intr_fd_get(intr_handle) < 0 || uio_cfg_fd < 0) 794 return -1; 795 796 switch (rte_intr_type_get(intr_handle)) { 797 /* Both acking and enabling are same for UIO */ 798 case RTE_INTR_HANDLE_UIO: 799 if (uio_intr_enable(intr_handle)) 800 return -1; 801 break; 802 case RTE_INTR_HANDLE_UIO_INTX: 803 if (uio_intx_intr_enable(intr_handle)) 804 return -1; 805 break; 806 /* not used at this moment */ 807 case RTE_INTR_HANDLE_ALARM: 808 return -1; 809 #ifdef VFIO_PRESENT 810 /* VFIO MSI* is implicitly acked unlike INTx, nothing to do */ 811 case RTE_INTR_HANDLE_VFIO_MSIX: 812 case RTE_INTR_HANDLE_VFIO_MSI: 813 return 0; 814 case RTE_INTR_HANDLE_VFIO_LEGACY: 815 if (vfio_ack_intx(intr_handle)) 816 return -1; 817 break; 818 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE 819 case RTE_INTR_HANDLE_VFIO_REQ: 820 return -1; 821 #endif 822 #endif 823 /* not used at this moment */ 824 case RTE_INTR_HANDLE_DEV_EVENT: 825 return -1; 826 /* unknown handle type */ 827 default: 828 RTE_LOG(ERR, EAL, "Unknown handle type of fd %d\n", 829 rte_intr_fd_get(intr_handle)); 830 return -1; 831 } 832 833 return 0; 834 } 835 836 int 837 rte_intr_disable(const struct rte_intr_handle *intr_handle) 838 { 839 int rc = 0, uio_cfg_fd; 840 841 if (intr_handle == NULL) 842 return -1; 843 844 if (rte_intr_type_get(intr_handle) == RTE_INTR_HANDLE_VDEV) { 845 rc = 0; 846 goto out; 847 } 848 849 uio_cfg_fd = rte_intr_dev_fd_get(intr_handle); 850 if (rte_intr_fd_get(intr_handle) < 0 || uio_cfg_fd < 0) { 851 rc = -1; 852 goto out; 853 } 854 855 switch (rte_intr_type_get(intr_handle)) { 856 /* write to the uio fd to disable the interrupt */ 857 case RTE_INTR_HANDLE_UIO: 858 if (uio_intr_disable(intr_handle)) 859 rc = -1; 860 break; 861 case RTE_INTR_HANDLE_UIO_INTX: 862 if (uio_intx_intr_disable(intr_handle)) 863 rc = -1; 864 break; 865 /* not used at this moment */ 866 case RTE_INTR_HANDLE_ALARM: 867 rc = -1; 868 break; 869 #ifdef VFIO_PRESENT 870 case RTE_INTR_HANDLE_VFIO_MSIX: 871 if (vfio_disable_msix(intr_handle)) 872 rc = -1; 873 break; 874 case RTE_INTR_HANDLE_VFIO_MSI: 875 if (vfio_disable_msi(intr_handle)) 876 rc = -1; 877 break; 878 case RTE_INTR_HANDLE_VFIO_LEGACY: 879 if (vfio_disable_intx(intr_handle)) 880 rc = -1; 881 break; 882 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE 883 case RTE_INTR_HANDLE_VFIO_REQ: 884 if (vfio_disable_req(intr_handle)) 885 rc = -1; 886 break; 887 #endif 888 #endif 889 /* not used at this moment */ 890 case RTE_INTR_HANDLE_DEV_EVENT: 891 rc = -1; 892 break; 893 /* unknown handle type */ 894 default: 895 RTE_LOG(ERR, EAL, "Unknown handle type of fd %d\n", 896 rte_intr_fd_get(intr_handle)); 897 rc = -1; 898 break; 899 } 900 out: 901 rte_eal_trace_intr_disable(intr_handle, rc); 902 return rc; 903 } 904 905 static int 906 eal_intr_process_interrupts(struct epoll_event *events, int nfds) 907 { 908 bool call = false; 909 int n, bytes_read, rv; 910 struct rte_intr_source *src; 911 struct rte_intr_callback *cb, *next; 912 union rte_intr_read_buffer buf; 913 struct rte_intr_callback active_cb; 914 915 for (n = 0; n < nfds; n++) { 916 917 /** 918 * if the pipe fd is ready to read, return out to 919 * rebuild the wait list. 920 */ 921 if (events[n].data.fd == intr_pipe.readfd){ 922 int r = read(intr_pipe.readfd, buf.charbuf, 923 sizeof(buf.charbuf)); 924 RTE_SET_USED(r); 925 return -1; 926 } 927 rte_spinlock_lock(&intr_lock); 928 TAILQ_FOREACH(src, &intr_sources, next) 929 if (rte_intr_fd_get(src->intr_handle) == events[n].data.fd) 930 break; 931 if (src == NULL){ 932 rte_spinlock_unlock(&intr_lock); 933 continue; 934 } 935 936 /* mark this interrupt source as active and release the lock. */ 937 src->active = 1; 938 rte_spinlock_unlock(&intr_lock); 939 940 /* set the length to be read dor different handle type */ 941 switch (rte_intr_type_get(src->intr_handle)) { 942 case RTE_INTR_HANDLE_UIO: 943 case RTE_INTR_HANDLE_UIO_INTX: 944 bytes_read = sizeof(buf.uio_intr_count); 945 break; 946 case RTE_INTR_HANDLE_ALARM: 947 bytes_read = sizeof(buf.timerfd_num); 948 break; 949 #ifdef VFIO_PRESENT 950 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE 951 case RTE_INTR_HANDLE_VFIO_REQ: 952 #endif 953 case RTE_INTR_HANDLE_VFIO_MSIX: 954 case RTE_INTR_HANDLE_VFIO_MSI: 955 case RTE_INTR_HANDLE_VFIO_LEGACY: 956 bytes_read = sizeof(buf.vfio_intr_count); 957 break; 958 #endif 959 case RTE_INTR_HANDLE_VDEV: 960 case RTE_INTR_HANDLE_EXT: 961 bytes_read = 0; 962 call = true; 963 break; 964 case RTE_INTR_HANDLE_DEV_EVENT: 965 bytes_read = 0; 966 call = true; 967 break; 968 default: 969 bytes_read = 1; 970 break; 971 } 972 973 if (bytes_read > 0) { 974 /** 975 * read out to clear the ready-to-be-read flag 976 * for epoll_wait. 977 */ 978 bytes_read = read(events[n].data.fd, &buf, bytes_read); 979 if (bytes_read < 0) { 980 if (errno == EINTR || errno == EWOULDBLOCK) 981 continue; 982 983 RTE_LOG(ERR, EAL, "Error reading from file " 984 "descriptor %d: %s\n", 985 events[n].data.fd, 986 strerror(errno)); 987 /* 988 * The device is unplugged or buggy, remove 989 * it as an interrupt source and return to 990 * force the wait list to be rebuilt. 991 */ 992 rte_spinlock_lock(&intr_lock); 993 TAILQ_REMOVE(&intr_sources, src, next); 994 rte_spinlock_unlock(&intr_lock); 995 996 for (cb = TAILQ_FIRST(&src->callbacks); cb; 997 cb = next) { 998 next = TAILQ_NEXT(cb, next); 999 TAILQ_REMOVE(&src->callbacks, cb, next); 1000 free(cb); 1001 } 1002 rte_intr_instance_free(src->intr_handle); 1003 free(src); 1004 return -1; 1005 } else if (bytes_read == 0) 1006 RTE_LOG(ERR, EAL, "Read nothing from file " 1007 "descriptor %d\n", events[n].data.fd); 1008 else 1009 call = true; 1010 } 1011 1012 /* grab a lock, again to call callbacks and update status. */ 1013 rte_spinlock_lock(&intr_lock); 1014 1015 if (call) { 1016 1017 /* Finally, call all callbacks. */ 1018 TAILQ_FOREACH(cb, &src->callbacks, next) { 1019 1020 /* make a copy and unlock. */ 1021 active_cb = *cb; 1022 rte_spinlock_unlock(&intr_lock); 1023 1024 /* call the actual callback */ 1025 active_cb.cb_fn(active_cb.cb_arg); 1026 1027 /*get the lock back. */ 1028 rte_spinlock_lock(&intr_lock); 1029 } 1030 } 1031 /* we done with that interrupt source, release it. */ 1032 src->active = 0; 1033 1034 rv = 0; 1035 1036 /* check if any callback are supposed to be removed */ 1037 for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) { 1038 next = TAILQ_NEXT(cb, next); 1039 if (cb->pending_delete) { 1040 TAILQ_REMOVE(&src->callbacks, cb, next); 1041 if (cb->ucb_fn) 1042 cb->ucb_fn(src->intr_handle, cb->cb_arg); 1043 free(cb); 1044 rv++; 1045 } 1046 } 1047 1048 /* all callbacks for that source are removed. */ 1049 if (TAILQ_EMPTY(&src->callbacks)) { 1050 TAILQ_REMOVE(&intr_sources, src, next); 1051 rte_intr_instance_free(src->intr_handle); 1052 free(src); 1053 } 1054 1055 /* notify the pipe fd waited by epoll_wait to rebuild the wait list */ 1056 if (rv > 0 && write(intr_pipe.writefd, "1", 1) < 0) { 1057 rte_spinlock_unlock(&intr_lock); 1058 return -EPIPE; 1059 } 1060 1061 rte_spinlock_unlock(&intr_lock); 1062 } 1063 1064 return 0; 1065 } 1066 1067 /** 1068 * It handles all the interrupts. 1069 * 1070 * @param pfd 1071 * epoll file descriptor. 1072 * @param totalfds 1073 * The number of file descriptors added in epoll. 1074 * 1075 * @return 1076 * void 1077 */ 1078 static void 1079 eal_intr_handle_interrupts(int pfd, unsigned totalfds) 1080 { 1081 struct epoll_event events[totalfds]; 1082 int nfds = 0; 1083 1084 for(;;) { 1085 nfds = epoll_wait(pfd, events, totalfds, 1086 EAL_INTR_EPOLL_WAIT_FOREVER); 1087 /* epoll_wait fail */ 1088 if (nfds < 0) { 1089 if (errno == EINTR) 1090 continue; 1091 RTE_LOG(ERR, EAL, 1092 "epoll_wait returns with fail\n"); 1093 return; 1094 } 1095 /* epoll_wait timeout, will never happens here */ 1096 else if (nfds == 0) 1097 continue; 1098 /* epoll_wait has at least one fd ready to read */ 1099 if (eal_intr_process_interrupts(events, nfds) < 0) 1100 return; 1101 } 1102 } 1103 1104 /** 1105 * It builds/rebuilds up the epoll file descriptor with all the 1106 * file descriptors being waited on. Then handles the interrupts. 1107 * 1108 * @param arg 1109 * pointer. (unused) 1110 * 1111 * @return 1112 * never return; 1113 */ 1114 static __rte_noreturn void * 1115 eal_intr_thread_main(__rte_unused void *arg) 1116 { 1117 /* host thread, never break out */ 1118 for (;;) { 1119 /* build up the epoll fd with all descriptors we are to 1120 * wait on then pass it to the handle_interrupts function 1121 */ 1122 static struct epoll_event pipe_event = { 1123 .events = EPOLLIN | EPOLLPRI, 1124 }; 1125 struct rte_intr_source *src; 1126 unsigned numfds = 0; 1127 1128 /* create epoll fd */ 1129 int pfd = epoll_create(1); 1130 if (pfd < 0) 1131 rte_panic("Cannot create epoll instance\n"); 1132 1133 pipe_event.data.fd = intr_pipe.readfd; 1134 /** 1135 * add pipe fd into wait list, this pipe is used to 1136 * rebuild the wait list. 1137 */ 1138 if (epoll_ctl(pfd, EPOLL_CTL_ADD, intr_pipe.readfd, 1139 &pipe_event) < 0) { 1140 rte_panic("Error adding fd to %d epoll_ctl, %s\n", 1141 intr_pipe.readfd, strerror(errno)); 1142 } 1143 numfds++; 1144 1145 rte_spinlock_lock(&intr_lock); 1146 1147 TAILQ_FOREACH(src, &intr_sources, next) { 1148 struct epoll_event ev; 1149 1150 if (src->callbacks.tqh_first == NULL) 1151 continue; /* skip those with no callbacks */ 1152 memset(&ev, 0, sizeof(ev)); 1153 ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP; 1154 ev.data.fd = rte_intr_fd_get(src->intr_handle); 1155 1156 /** 1157 * add all the uio device file descriptor 1158 * into wait list. 1159 */ 1160 if (epoll_ctl(pfd, EPOLL_CTL_ADD, 1161 rte_intr_fd_get(src->intr_handle), &ev) < 0) { 1162 rte_panic("Error adding fd %d epoll_ctl, %s\n", 1163 rte_intr_fd_get(src->intr_handle), 1164 strerror(errno)); 1165 } 1166 else 1167 numfds++; 1168 } 1169 rte_spinlock_unlock(&intr_lock); 1170 /* serve the interrupt */ 1171 eal_intr_handle_interrupts(pfd, numfds); 1172 1173 /** 1174 * when we return, we need to rebuild the 1175 * list of fds to monitor. 1176 */ 1177 close(pfd); 1178 } 1179 } 1180 1181 int 1182 rte_eal_intr_init(void) 1183 { 1184 int ret = 0; 1185 1186 /* init the global interrupt source head */ 1187 TAILQ_INIT(&intr_sources); 1188 1189 /** 1190 * create a pipe which will be waited by epoll and notified to 1191 * rebuild the wait list of epoll. 1192 */ 1193 if (pipe(intr_pipe.pipefd) < 0) { 1194 rte_errno = errno; 1195 return -1; 1196 } 1197 1198 /* create the host thread to wait/handle the interrupt */ 1199 ret = rte_ctrl_thread_create(&intr_thread, "eal-intr-thread", NULL, 1200 eal_intr_thread_main, NULL); 1201 if (ret != 0) { 1202 rte_errno = -ret; 1203 RTE_LOG(ERR, EAL, 1204 "Failed to create thread for interrupt handling\n"); 1205 } 1206 1207 return ret; 1208 } 1209 1210 static void 1211 eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle) 1212 { 1213 union rte_intr_read_buffer buf; 1214 int bytes_read = 0; 1215 int nbytes; 1216 1217 switch (rte_intr_type_get(intr_handle)) { 1218 case RTE_INTR_HANDLE_UIO: 1219 case RTE_INTR_HANDLE_UIO_INTX: 1220 bytes_read = sizeof(buf.uio_intr_count); 1221 break; 1222 #ifdef VFIO_PRESENT 1223 case RTE_INTR_HANDLE_VFIO_MSIX: 1224 case RTE_INTR_HANDLE_VFIO_MSI: 1225 case RTE_INTR_HANDLE_VFIO_LEGACY: 1226 bytes_read = sizeof(buf.vfio_intr_count); 1227 break; 1228 #endif 1229 case RTE_INTR_HANDLE_VDEV: 1230 bytes_read = rte_intr_efd_counter_size_get(intr_handle); 1231 /* For vdev, number of bytes to read is set by driver */ 1232 break; 1233 case RTE_INTR_HANDLE_EXT: 1234 return; 1235 default: 1236 bytes_read = 1; 1237 RTE_LOG(INFO, EAL, "unexpected intr type\n"); 1238 break; 1239 } 1240 1241 /** 1242 * read out to clear the ready-to-be-read flag 1243 * for epoll_wait. 1244 */ 1245 if (bytes_read == 0) 1246 return; 1247 do { 1248 nbytes = read(fd, &buf, bytes_read); 1249 if (nbytes < 0) { 1250 if (errno == EINTR || errno == EWOULDBLOCK || 1251 errno == EAGAIN) 1252 continue; 1253 RTE_LOG(ERR, EAL, 1254 "Error reading from fd %d: %s\n", 1255 fd, strerror(errno)); 1256 } else if (nbytes == 0) 1257 RTE_LOG(ERR, EAL, "Read nothing from fd %d\n", fd); 1258 return; 1259 } while (1); 1260 } 1261 1262 static int 1263 eal_epoll_process_event(struct epoll_event *evs, unsigned int n, 1264 struct rte_epoll_event *events) 1265 { 1266 unsigned int i, count = 0; 1267 struct rte_epoll_event *rev; 1268 uint32_t valid_status; 1269 1270 for (i = 0; i < n; i++) { 1271 rev = evs[i].data.ptr; 1272 valid_status = RTE_EPOLL_VALID; 1273 /* ACQUIRE memory ordering here pairs with RELEASE 1274 * ordering below acting as a lock to synchronize 1275 * the event data updating. 1276 */ 1277 if (!rev || !__atomic_compare_exchange_n(&rev->status, 1278 &valid_status, RTE_EPOLL_EXEC, 0, 1279 __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 1280 continue; 1281 1282 events[count].status = RTE_EPOLL_VALID; 1283 events[count].fd = rev->fd; 1284 events[count].epfd = rev->epfd; 1285 events[count].epdata.event = evs[i].events; 1286 events[count].epdata.data = rev->epdata.data; 1287 if (rev->epdata.cb_fun) 1288 rev->epdata.cb_fun(rev->fd, 1289 rev->epdata.cb_arg); 1290 1291 /* the status update should be observed after 1292 * the other fields change. 1293 */ 1294 __atomic_store_n(&rev->status, RTE_EPOLL_VALID, 1295 __ATOMIC_RELEASE); 1296 count++; 1297 } 1298 return count; 1299 } 1300 1301 static inline int 1302 eal_init_tls_epfd(void) 1303 { 1304 int pfd = epoll_create(255); 1305 1306 if (pfd < 0) { 1307 RTE_LOG(ERR, EAL, 1308 "Cannot create epoll instance\n"); 1309 return -1; 1310 } 1311 return pfd; 1312 } 1313 1314 int 1315 rte_intr_tls_epfd(void) 1316 { 1317 if (RTE_PER_LCORE(_epfd) == -1) 1318 RTE_PER_LCORE(_epfd) = eal_init_tls_epfd(); 1319 1320 return RTE_PER_LCORE(_epfd); 1321 } 1322 1323 static int 1324 eal_epoll_wait(int epfd, struct rte_epoll_event *events, 1325 int maxevents, int timeout, bool interruptible) 1326 { 1327 struct epoll_event evs[maxevents]; 1328 int rc; 1329 1330 if (!events) { 1331 RTE_LOG(ERR, EAL, "rte_epoll_event can't be NULL\n"); 1332 return -1; 1333 } 1334 1335 /* using per thread epoll fd */ 1336 if (epfd == RTE_EPOLL_PER_THREAD) 1337 epfd = rte_intr_tls_epfd(); 1338 1339 while (1) { 1340 rc = epoll_wait(epfd, evs, maxevents, timeout); 1341 if (likely(rc > 0)) { 1342 /* epoll_wait has at least one fd ready to read */ 1343 rc = eal_epoll_process_event(evs, rc, events); 1344 break; 1345 } else if (rc < 0) { 1346 if (errno == EINTR) { 1347 if (interruptible) 1348 return -1; 1349 else 1350 continue; 1351 } 1352 /* epoll_wait fail */ 1353 RTE_LOG(ERR, EAL, "epoll_wait returns with fail %s\n", 1354 strerror(errno)); 1355 rc = -1; 1356 break; 1357 } else { 1358 /* rc == 0, epoll_wait timed out */ 1359 break; 1360 } 1361 } 1362 1363 return rc; 1364 } 1365 1366 int 1367 rte_epoll_wait(int epfd, struct rte_epoll_event *events, 1368 int maxevents, int timeout) 1369 { 1370 return eal_epoll_wait(epfd, events, maxevents, timeout, false); 1371 } 1372 1373 int 1374 rte_epoll_wait_interruptible(int epfd, struct rte_epoll_event *events, 1375 int maxevents, int timeout) 1376 { 1377 return eal_epoll_wait(epfd, events, maxevents, timeout, true); 1378 } 1379 1380 static inline void 1381 eal_epoll_data_safe_free(struct rte_epoll_event *ev) 1382 { 1383 uint32_t valid_status = RTE_EPOLL_VALID; 1384 1385 while (!__atomic_compare_exchange_n(&ev->status, &valid_status, 1386 RTE_EPOLL_INVALID, 0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) { 1387 while (__atomic_load_n(&ev->status, 1388 __ATOMIC_RELAXED) != RTE_EPOLL_VALID) 1389 rte_pause(); 1390 valid_status = RTE_EPOLL_VALID; 1391 } 1392 memset(&ev->epdata, 0, sizeof(ev->epdata)); 1393 ev->fd = -1; 1394 ev->epfd = -1; 1395 } 1396 1397 int 1398 rte_epoll_ctl(int epfd, int op, int fd, 1399 struct rte_epoll_event *event) 1400 { 1401 struct epoll_event ev; 1402 1403 if (!event) { 1404 RTE_LOG(ERR, EAL, "rte_epoll_event can't be NULL\n"); 1405 return -1; 1406 } 1407 1408 /* using per thread epoll fd */ 1409 if (epfd == RTE_EPOLL_PER_THREAD) 1410 epfd = rte_intr_tls_epfd(); 1411 1412 if (op == EPOLL_CTL_ADD) { 1413 __atomic_store_n(&event->status, RTE_EPOLL_VALID, 1414 __ATOMIC_RELAXED); 1415 event->fd = fd; /* ignore fd in event */ 1416 event->epfd = epfd; 1417 ev.data.ptr = (void *)event; 1418 } 1419 1420 ev.events = event->epdata.event; 1421 if (epoll_ctl(epfd, op, fd, &ev) < 0) { 1422 RTE_LOG(ERR, EAL, "Error op %d fd %d epoll_ctl, %s\n", 1423 op, fd, strerror(errno)); 1424 if (op == EPOLL_CTL_ADD) 1425 /* rollback status when CTL_ADD fail */ 1426 __atomic_store_n(&event->status, RTE_EPOLL_INVALID, 1427 __ATOMIC_RELAXED); 1428 return -1; 1429 } 1430 1431 if (op == EPOLL_CTL_DEL && __atomic_load_n(&event->status, 1432 __ATOMIC_RELAXED) != RTE_EPOLL_INVALID) 1433 eal_epoll_data_safe_free(event); 1434 1435 return 0; 1436 } 1437 1438 int 1439 rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, int epfd, 1440 int op, unsigned int vec, void *data) 1441 { 1442 struct rte_epoll_event *rev; 1443 struct rte_epoll_data *epdata; 1444 int epfd_op; 1445 unsigned int efd_idx; 1446 int rc = 0; 1447 1448 efd_idx = (vec >= RTE_INTR_VEC_RXTX_OFFSET) ? 1449 (vec - RTE_INTR_VEC_RXTX_OFFSET) : vec; 1450 1451 if (intr_handle == NULL || rte_intr_nb_efd_get(intr_handle) == 0 || 1452 efd_idx >= (unsigned int)rte_intr_nb_efd_get(intr_handle)) { 1453 RTE_LOG(ERR, EAL, "Wrong intr vector number.\n"); 1454 return -EPERM; 1455 } 1456 1457 switch (op) { 1458 case RTE_INTR_EVENT_ADD: 1459 epfd_op = EPOLL_CTL_ADD; 1460 rev = rte_intr_elist_index_get(intr_handle, efd_idx); 1461 if (__atomic_load_n(&rev->status, 1462 __ATOMIC_RELAXED) != RTE_EPOLL_INVALID) { 1463 RTE_LOG(INFO, EAL, "Event already been added.\n"); 1464 return -EEXIST; 1465 } 1466 1467 /* attach to intr vector fd */ 1468 epdata = &rev->epdata; 1469 epdata->event = EPOLLIN | EPOLLPRI | EPOLLET; 1470 epdata->data = data; 1471 epdata->cb_fun = (rte_intr_event_cb_t)eal_intr_proc_rxtx_intr; 1472 epdata->cb_arg = (void *)intr_handle; 1473 rc = rte_epoll_ctl(epfd, epfd_op, 1474 rte_intr_efds_index_get(intr_handle, efd_idx), rev); 1475 if (!rc) 1476 RTE_LOG(DEBUG, EAL, 1477 "efd %d associated with vec %d added on epfd %d" 1478 "\n", rev->fd, vec, epfd); 1479 else 1480 rc = -EPERM; 1481 break; 1482 case RTE_INTR_EVENT_DEL: 1483 epfd_op = EPOLL_CTL_DEL; 1484 rev = rte_intr_elist_index_get(intr_handle, efd_idx); 1485 if (__atomic_load_n(&rev->status, 1486 __ATOMIC_RELAXED) == RTE_EPOLL_INVALID) { 1487 RTE_LOG(INFO, EAL, "Event does not exist.\n"); 1488 return -EPERM; 1489 } 1490 1491 rc = rte_epoll_ctl(rev->epfd, epfd_op, rev->fd, rev); 1492 if (rc) 1493 rc = -EPERM; 1494 break; 1495 default: 1496 RTE_LOG(ERR, EAL, "event op type mismatch\n"); 1497 rc = -EPERM; 1498 } 1499 1500 return rc; 1501 } 1502 1503 void 1504 rte_intr_free_epoll_fd(struct rte_intr_handle *intr_handle) 1505 { 1506 uint32_t i; 1507 struct rte_epoll_event *rev; 1508 1509 for (i = 0; i < (uint32_t)rte_intr_nb_efd_get(intr_handle); i++) { 1510 rev = rte_intr_elist_index_get(intr_handle, i); 1511 if (__atomic_load_n(&rev->status, 1512 __ATOMIC_RELAXED) == RTE_EPOLL_INVALID) 1513 continue; 1514 if (rte_epoll_ctl(rev->epfd, EPOLL_CTL_DEL, rev->fd, rev)) { 1515 /* force free if the entry valid */ 1516 eal_epoll_data_safe_free(rev); 1517 } 1518 } 1519 } 1520 1521 int 1522 rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd) 1523 { 1524 uint32_t i; 1525 int fd; 1526 uint32_t n = RTE_MIN(nb_efd, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); 1527 1528 assert(nb_efd != 0); 1529 1530 if (rte_intr_type_get(intr_handle) == RTE_INTR_HANDLE_VFIO_MSIX) { 1531 for (i = 0; i < n; i++) { 1532 fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); 1533 if (fd < 0) { 1534 RTE_LOG(ERR, EAL, 1535 "can't setup eventfd, error %i (%s)\n", 1536 errno, strerror(errno)); 1537 return -errno; 1538 } 1539 1540 if (rte_intr_efds_index_set(intr_handle, i, fd)) 1541 return -rte_errno; 1542 } 1543 1544 if (rte_intr_nb_efd_set(intr_handle, n)) 1545 return -rte_errno; 1546 1547 if (rte_intr_max_intr_set(intr_handle, NB_OTHER_INTR + n)) 1548 return -rte_errno; 1549 } else if (rte_intr_type_get(intr_handle) == RTE_INTR_HANDLE_VDEV) { 1550 /* only check, initialization would be done in vdev driver.*/ 1551 if ((uint64_t)rte_intr_efd_counter_size_get(intr_handle) > 1552 sizeof(union rte_intr_read_buffer)) { 1553 RTE_LOG(ERR, EAL, "the efd_counter_size is oversized"); 1554 return -EINVAL; 1555 } 1556 } else { 1557 if (rte_intr_efds_index_set(intr_handle, 0, rte_intr_fd_get(intr_handle))) 1558 return -rte_errno; 1559 if (rte_intr_nb_efd_set(intr_handle, RTE_MIN(nb_efd, 1U))) 1560 return -rte_errno; 1561 if (rte_intr_max_intr_set(intr_handle, NB_OTHER_INTR)) 1562 return -rte_errno; 1563 } 1564 1565 return 0; 1566 } 1567 1568 void 1569 rte_intr_efd_disable(struct rte_intr_handle *intr_handle) 1570 { 1571 uint32_t i; 1572 1573 rte_intr_free_epoll_fd(intr_handle); 1574 if (rte_intr_max_intr_get(intr_handle) > rte_intr_nb_efd_get(intr_handle)) { 1575 for (i = 0; i < (uint32_t)rte_intr_nb_efd_get(intr_handle); i++) 1576 close(rte_intr_efds_index_get(intr_handle, i)); 1577 } 1578 rte_intr_nb_efd_set(intr_handle, 0); 1579 rte_intr_max_intr_set(intr_handle, 0); 1580 } 1581 1582 int 1583 rte_intr_dp_is_en(struct rte_intr_handle *intr_handle) 1584 { 1585 return !(!rte_intr_nb_efd_get(intr_handle)); 1586 } 1587 1588 int 1589 rte_intr_allow_others(struct rte_intr_handle *intr_handle) 1590 { 1591 if (!rte_intr_dp_is_en(intr_handle)) 1592 return 1; 1593 else 1594 return !!(rte_intr_max_intr_get(intr_handle) - 1595 rte_intr_nb_efd_get(intr_handle)); 1596 } 1597 1598 int 1599 rte_intr_cap_multiple(struct rte_intr_handle *intr_handle) 1600 { 1601 if (rte_intr_type_get(intr_handle) == RTE_INTR_HANDLE_VFIO_MSIX) 1602 return 1; 1603 1604 if (rte_intr_type_get(intr_handle) == RTE_INTR_HANDLE_VDEV) 1605 return 1; 1606 1607 return 0; 1608 } 1609 1610 int rte_thread_is_intr(void) 1611 { 1612 return pthread_equal(intr_thread, pthread_self()); 1613 } 1614