1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
3 */
4
5 #include <stdio.h>
6 #include <stdint.h>
7 #include <stdlib.h>
8 #include <sys/queue.h>
9 #include <unistd.h>
10 #include <string.h>
11 #include <errno.h>
12 #include <sys/epoll.h>
13 #include <sys/ioctl.h>
14 #include <sys/eventfd.h>
15 #include <assert.h>
16 #include <stdbool.h>
17
18 #include <eal_trace_internal.h>
19 #include <rte_common.h>
20 #include <rte_interrupts.h>
21 #include <rte_thread.h>
22 #include <rte_per_lcore.h>
23 #include <rte_lcore.h>
24 #include <rte_branch_prediction.h>
25 #include <rte_debug.h>
26 #include <rte_log.h>
27 #include <rte_errno.h>
28 #include <rte_spinlock.h>
29 #include <rte_pause.h>
30 #include <rte_vfio.h>
31
32 #include "eal_private.h"
33
34 #define EAL_INTR_EPOLL_WAIT_FOREVER (-1)
35 #define NB_OTHER_INTR 1
36
37 static RTE_DEFINE_PER_LCORE(int, _epfd) = -1; /**< epoll fd per thread */
38
39 /**
40 * union for pipe fds.
41 */
42 union intr_pipefds{
43 struct {
44 int pipefd[2];
45 };
46 struct {
47 int readfd;
48 int writefd;
49 };
50 };
51
52 /**
53 * union buffer for reading on different devices
54 */
55 union rte_intr_read_buffer {
56 int uio_intr_count; /* for uio device */
57 #ifdef VFIO_PRESENT
58 uint64_t vfio_intr_count; /* for vfio device */
59 #endif
60 uint64_t timerfd_num; /* for timerfd */
61 char charbuf[16]; /* for others */
62 };
63
64 TAILQ_HEAD(rte_intr_cb_list, rte_intr_callback);
65 TAILQ_HEAD(rte_intr_source_list, rte_intr_source);
66
67 struct rte_intr_callback {
68 TAILQ_ENTRY(rte_intr_callback) next;
69 rte_intr_callback_fn cb_fn; /**< callback address */
70 void *cb_arg; /**< parameter for callback */
71 uint8_t pending_delete; /**< delete after callback is called */
72 rte_intr_unregister_callback_fn ucb_fn; /**< fn to call before cb is deleted */
73 };
74
75 struct rte_intr_source {
76 TAILQ_ENTRY(rte_intr_source) next;
77 struct rte_intr_handle *intr_handle; /**< interrupt handle */
78 struct rte_intr_cb_list callbacks; /**< user callbacks */
79 uint32_t active;
80 };
81
82 /* global spinlock for interrupt data operation */
83 static rte_spinlock_t intr_lock = RTE_SPINLOCK_INITIALIZER;
84
85 /* union buffer for pipe read/write */
86 static union intr_pipefds intr_pipe;
87
88 /* interrupt sources list */
89 static struct rte_intr_source_list intr_sources;
90
91 /* interrupt handling thread */
92 static rte_thread_t intr_thread;
93
94 /* VFIO interrupts */
95 #ifdef VFIO_PRESENT
96
97 #define IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + sizeof(int))
98 /* irq set buffer length for queue interrupts and LSC interrupt */
99 #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
100 sizeof(int) * (RTE_MAX_RXTX_INTR_VEC_ID + 1))
101
102 /* enable legacy (INTx) interrupts */
103 static int
vfio_enable_intx(const struct rte_intr_handle * intr_handle)104 vfio_enable_intx(const struct rte_intr_handle *intr_handle) {
105 struct vfio_irq_set *irq_set;
106 char irq_set_buf[IRQ_SET_BUF_LEN];
107 int len, ret, vfio_dev_fd;
108 int *fd_ptr;
109
110 len = sizeof(irq_set_buf);
111
112 /* enable INTx */
113 irq_set = (struct vfio_irq_set *) irq_set_buf;
114 irq_set->argsz = len;
115 irq_set->count = 1;
116 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
117 irq_set->index = VFIO_PCI_INTX_IRQ_INDEX;
118 irq_set->start = 0;
119 fd_ptr = (int *) &irq_set->data;
120 *fd_ptr = rte_intr_fd_get(intr_handle);
121
122 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle);
123 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
124
125 if (ret) {
126 EAL_LOG(ERR, "Error enabling INTx interrupts for fd %d",
127 rte_intr_fd_get(intr_handle));
128 return -1;
129 }
130
131 /* unmask INTx after enabling */
132 memset(irq_set, 0, len);
133 len = sizeof(struct vfio_irq_set);
134 irq_set->argsz = len;
135 irq_set->count = 1;
136 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK;
137 irq_set->index = VFIO_PCI_INTX_IRQ_INDEX;
138 irq_set->start = 0;
139
140 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
141
142 if (ret) {
143 EAL_LOG(ERR, "Error unmasking INTx interrupts for fd %d",
144 rte_intr_fd_get(intr_handle));
145 return -1;
146 }
147 return 0;
148 }
149
150 /* disable legacy (INTx) interrupts */
151 static int
vfio_disable_intx(const struct rte_intr_handle * intr_handle)152 vfio_disable_intx(const struct rte_intr_handle *intr_handle) {
153 struct vfio_irq_set *irq_set;
154 char irq_set_buf[IRQ_SET_BUF_LEN];
155 int len, ret, vfio_dev_fd;
156
157 len = sizeof(struct vfio_irq_set);
158
159 /* mask interrupts before disabling */
160 irq_set = (struct vfio_irq_set *) irq_set_buf;
161 irq_set->argsz = len;
162 irq_set->count = 1;
163 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK;
164 irq_set->index = VFIO_PCI_INTX_IRQ_INDEX;
165 irq_set->start = 0;
166
167 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle);
168 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
169
170 if (ret) {
171 EAL_LOG(ERR, "Error masking INTx interrupts for fd %d",
172 rte_intr_fd_get(intr_handle));
173 return -1;
174 }
175
176 /* disable INTx*/
177 memset(irq_set, 0, len);
178 irq_set->argsz = len;
179 irq_set->count = 0;
180 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
181 irq_set->index = VFIO_PCI_INTX_IRQ_INDEX;
182 irq_set->start = 0;
183
184 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
185
186 if (ret) {
187 EAL_LOG(ERR, "Error disabling INTx interrupts for fd %d",
188 rte_intr_fd_get(intr_handle));
189 return -1;
190 }
191 return 0;
192 }
193
194 /* unmask/ack legacy (INTx) interrupts */
195 static int
vfio_ack_intx(const struct rte_intr_handle * intr_handle)196 vfio_ack_intx(const struct rte_intr_handle *intr_handle)
197 {
198 struct vfio_irq_set irq_set;
199 int vfio_dev_fd;
200
201 /* unmask INTx */
202 memset(&irq_set, 0, sizeof(irq_set));
203 irq_set.argsz = sizeof(irq_set);
204 irq_set.count = 1;
205 irq_set.flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK;
206 irq_set.index = VFIO_PCI_INTX_IRQ_INDEX;
207 irq_set.start = 0;
208
209 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle);
210 if (ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, &irq_set)) {
211 EAL_LOG(ERR, "Error unmasking INTx interrupts for fd %d",
212 rte_intr_fd_get(intr_handle));
213 return -1;
214 }
215 return 0;
216 }
217
218 /* enable MSI interrupts */
219 static int
vfio_enable_msi(const struct rte_intr_handle * intr_handle)220 vfio_enable_msi(const struct rte_intr_handle *intr_handle) {
221 int len, ret;
222 char irq_set_buf[IRQ_SET_BUF_LEN];
223 struct vfio_irq_set *irq_set;
224 int *fd_ptr, vfio_dev_fd;
225
226 len = sizeof(irq_set_buf);
227
228 irq_set = (struct vfio_irq_set *) irq_set_buf;
229 irq_set->argsz = len;
230 irq_set->count = 1;
231 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
232 irq_set->index = VFIO_PCI_MSI_IRQ_INDEX;
233 irq_set->start = 0;
234 fd_ptr = (int *) &irq_set->data;
235 *fd_ptr = rte_intr_fd_get(intr_handle);
236
237 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle);
238 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
239
240 if (ret) {
241 EAL_LOG(ERR, "Error enabling MSI interrupts for fd %d",
242 rte_intr_fd_get(intr_handle));
243 return -1;
244 }
245 return 0;
246 }
247
248 /* disable MSI interrupts */
249 static int
vfio_disable_msi(const struct rte_intr_handle * intr_handle)250 vfio_disable_msi(const struct rte_intr_handle *intr_handle) {
251 struct vfio_irq_set *irq_set;
252 char irq_set_buf[IRQ_SET_BUF_LEN];
253 int len, ret, vfio_dev_fd;
254
255 len = sizeof(struct vfio_irq_set);
256
257 irq_set = (struct vfio_irq_set *) irq_set_buf;
258 irq_set->argsz = len;
259 irq_set->count = 0;
260 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
261 irq_set->index = VFIO_PCI_MSI_IRQ_INDEX;
262 irq_set->start = 0;
263
264 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle);
265 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
266 if (ret)
267 EAL_LOG(ERR, "Error disabling MSI interrupts for fd %d",
268 rte_intr_fd_get(intr_handle));
269
270 return ret;
271 }
272
273 /* enable MSI-X interrupts */
274 static int
vfio_enable_msix(const struct rte_intr_handle * intr_handle)275 vfio_enable_msix(const struct rte_intr_handle *intr_handle) {
276 int len, ret;
277 char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
278 struct vfio_irq_set *irq_set;
279 int *fd_ptr, vfio_dev_fd, i;
280
281 len = sizeof(irq_set_buf);
282
283 irq_set = (struct vfio_irq_set *) irq_set_buf;
284 irq_set->argsz = len;
285 /* 0 < irq_set->count < RTE_MAX_RXTX_INTR_VEC_ID + 1 */
286 irq_set->count = rte_intr_max_intr_get(intr_handle) ?
287 (rte_intr_max_intr_get(intr_handle) >
288 RTE_MAX_RXTX_INTR_VEC_ID + 1 ? RTE_MAX_RXTX_INTR_VEC_ID + 1 :
289 rte_intr_max_intr_get(intr_handle)) : 1;
290
291 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
292 irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
293 irq_set->start = 0;
294 fd_ptr = (int *) &irq_set->data;
295 /* INTR vector offset 0 reserve for non-efds mapping */
296 fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = rte_intr_fd_get(intr_handle);
297 for (i = 0; i < rte_intr_nb_efd_get(intr_handle); i++) {
298 fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] =
299 rte_intr_efds_index_get(intr_handle, i);
300 }
301
302 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle);
303 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
304
305 if (ret) {
306 EAL_LOG(ERR, "Error enabling MSI-X interrupts for fd %d",
307 rte_intr_fd_get(intr_handle));
308 return -1;
309 }
310
311 return 0;
312 }
313
314 /* disable MSI-X interrupts */
315 static int
vfio_disable_msix(const struct rte_intr_handle * intr_handle)316 vfio_disable_msix(const struct rte_intr_handle *intr_handle) {
317 struct vfio_irq_set *irq_set;
318 char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
319 int len, ret, vfio_dev_fd;
320
321 len = sizeof(struct vfio_irq_set);
322
323 irq_set = (struct vfio_irq_set *) irq_set_buf;
324 irq_set->argsz = len;
325 irq_set->count = 0;
326 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
327 irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
328 irq_set->start = 0;
329
330 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle);
331 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
332
333 if (ret)
334 EAL_LOG(ERR, "Error disabling MSI-X interrupts for fd %d",
335 rte_intr_fd_get(intr_handle));
336
337 return ret;
338 }
339
340 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE
341 /* enable req notifier */
342 static int
vfio_enable_req(const struct rte_intr_handle * intr_handle)343 vfio_enable_req(const struct rte_intr_handle *intr_handle)
344 {
345 int len, ret;
346 char irq_set_buf[IRQ_SET_BUF_LEN];
347 struct vfio_irq_set *irq_set;
348 int *fd_ptr, vfio_dev_fd;
349
350 len = sizeof(irq_set_buf);
351
352 irq_set = (struct vfio_irq_set *) irq_set_buf;
353 irq_set->argsz = len;
354 irq_set->count = 1;
355 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
356 VFIO_IRQ_SET_ACTION_TRIGGER;
357 irq_set->index = VFIO_PCI_REQ_IRQ_INDEX;
358 irq_set->start = 0;
359 fd_ptr = (int *) &irq_set->data;
360 *fd_ptr = rte_intr_fd_get(intr_handle);
361
362 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle);
363 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
364
365 if (ret) {
366 EAL_LOG(ERR, "Error enabling req interrupts for fd %d",
367 rte_intr_fd_get(intr_handle));
368 return -1;
369 }
370
371 return 0;
372 }
373
374 /* disable req notifier */
375 static int
vfio_disable_req(const struct rte_intr_handle * intr_handle)376 vfio_disable_req(const struct rte_intr_handle *intr_handle)
377 {
378 struct vfio_irq_set *irq_set;
379 char irq_set_buf[IRQ_SET_BUF_LEN];
380 int len, ret, vfio_dev_fd;
381
382 len = sizeof(struct vfio_irq_set);
383
384 irq_set = (struct vfio_irq_set *) irq_set_buf;
385 irq_set->argsz = len;
386 irq_set->count = 0;
387 irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
388 irq_set->index = VFIO_PCI_REQ_IRQ_INDEX;
389 irq_set->start = 0;
390
391 vfio_dev_fd = rte_intr_dev_fd_get(intr_handle);
392 ret = ioctl(vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
393
394 if (ret)
395 EAL_LOG(ERR, "Error disabling req interrupts for fd %d",
396 rte_intr_fd_get(intr_handle));
397
398 return ret;
399 }
400 #endif
401 #endif
402
403 static int
uio_intx_intr_disable(const struct rte_intr_handle * intr_handle)404 uio_intx_intr_disable(const struct rte_intr_handle *intr_handle)
405 {
406 unsigned char command_high;
407 int uio_cfg_fd;
408
409 /* use UIO config file descriptor for uio_pci_generic */
410 uio_cfg_fd = rte_intr_dev_fd_get(intr_handle);
411 if (uio_cfg_fd < 0 || pread(uio_cfg_fd, &command_high, 1, 5) != 1) {
412 EAL_LOG(ERR,
413 "Error reading interrupts status for fd %d",
414 uio_cfg_fd);
415 return -1;
416 }
417 /* disable interrupts */
418 command_high |= 0x4;
419 if (pwrite(uio_cfg_fd, &command_high, 1, 5) != 1) {
420 EAL_LOG(ERR,
421 "Error disabling interrupts for fd %d",
422 uio_cfg_fd);
423 return -1;
424 }
425
426 return 0;
427 }
428
429 static int
uio_intx_intr_enable(const struct rte_intr_handle * intr_handle)430 uio_intx_intr_enable(const struct rte_intr_handle *intr_handle)
431 {
432 unsigned char command_high;
433 int uio_cfg_fd;
434
435 /* use UIO config file descriptor for uio_pci_generic */
436 uio_cfg_fd = rte_intr_dev_fd_get(intr_handle);
437 if (uio_cfg_fd < 0 || pread(uio_cfg_fd, &command_high, 1, 5) != 1) {
438 EAL_LOG(ERR,
439 "Error reading interrupts status for fd %d",
440 uio_cfg_fd);
441 return -1;
442 }
443 /* enable interrupts */
444 command_high &= ~0x4;
445 if (pwrite(uio_cfg_fd, &command_high, 1, 5) != 1) {
446 EAL_LOG(ERR,
447 "Error enabling interrupts for fd %d",
448 uio_cfg_fd);
449 return -1;
450 }
451
452 return 0;
453 }
454
455 static int
uio_intr_disable(const struct rte_intr_handle * intr_handle)456 uio_intr_disable(const struct rte_intr_handle *intr_handle)
457 {
458 const int value = 0;
459
460 if (rte_intr_fd_get(intr_handle) < 0 ||
461 write(rte_intr_fd_get(intr_handle), &value, sizeof(value)) < 0) {
462 EAL_LOG(ERR, "Error disabling interrupts for fd %d (%s)",
463 rte_intr_fd_get(intr_handle), strerror(errno));
464 return -1;
465 }
466 return 0;
467 }
468
469 static int
uio_intr_enable(const struct rte_intr_handle * intr_handle)470 uio_intr_enable(const struct rte_intr_handle *intr_handle)
471 {
472 const int value = 1;
473
474 if (rte_intr_fd_get(intr_handle) < 0 ||
475 write(rte_intr_fd_get(intr_handle), &value, sizeof(value)) < 0) {
476 EAL_LOG(ERR, "Error enabling interrupts for fd %d (%s)",
477 rte_intr_fd_get(intr_handle), strerror(errno));
478 return -1;
479 }
480 return 0;
481 }
482
483 int
rte_intr_callback_register(const struct rte_intr_handle * intr_handle,rte_intr_callback_fn cb,void * cb_arg)484 rte_intr_callback_register(const struct rte_intr_handle *intr_handle,
485 rte_intr_callback_fn cb, void *cb_arg)
486 {
487 int ret, wake_thread;
488 struct rte_intr_source *src;
489 struct rte_intr_callback *callback;
490
491 wake_thread = 0;
492
493 /* first do parameter checking */
494 if (rte_intr_fd_get(intr_handle) < 0 || cb == NULL) {
495 EAL_LOG(ERR, "Registering with invalid input parameter");
496 return -EINVAL;
497 }
498
499 /* allocate a new interrupt callback entity */
500 callback = calloc(1, sizeof(*callback));
501 if (callback == NULL) {
502 EAL_LOG(ERR, "Can not allocate memory");
503 return -ENOMEM;
504 }
505 callback->cb_fn = cb;
506 callback->cb_arg = cb_arg;
507 callback->pending_delete = 0;
508 callback->ucb_fn = NULL;
509
510 rte_spinlock_lock(&intr_lock);
511
512 /* check if there is at least one callback registered for the fd */
513 TAILQ_FOREACH(src, &intr_sources, next) {
514 if (rte_intr_fd_get(src->intr_handle) == rte_intr_fd_get(intr_handle)) {
515 /* we had no interrupts for this */
516 if (TAILQ_EMPTY(&src->callbacks))
517 wake_thread = 1;
518
519 TAILQ_INSERT_TAIL(&(src->callbacks), callback, next);
520 ret = 0;
521 break;
522 }
523 }
524
525 /* no existing callbacks for this - add new source */
526 if (src == NULL) {
527 src = calloc(1, sizeof(*src));
528 if (src == NULL) {
529 EAL_LOG(ERR, "Can not allocate memory");
530 ret = -ENOMEM;
531 free(callback);
532 callback = NULL;
533 } else {
534 src->intr_handle = rte_intr_instance_dup(intr_handle);
535 if (src->intr_handle == NULL) {
536 EAL_LOG(ERR, "Can not create intr instance");
537 ret = -ENOMEM;
538 free(callback);
539 callback = NULL;
540 free(src);
541 src = NULL;
542 } else {
543 TAILQ_INIT(&src->callbacks);
544 TAILQ_INSERT_TAIL(&(src->callbacks), callback,
545 next);
546 TAILQ_INSERT_TAIL(&intr_sources, src, next);
547 wake_thread = 1;
548 ret = 0;
549 }
550 }
551 }
552
553 rte_spinlock_unlock(&intr_lock);
554
555 /**
556 * check if need to notify the pipe fd waited by epoll_wait to
557 * rebuild the wait list.
558 */
559 if (wake_thread)
560 if (write(intr_pipe.writefd, "1", 1) < 0)
561 ret = -EPIPE;
562
563 rte_eal_trace_intr_callback_register(intr_handle, cb, cb_arg, ret);
564 return ret;
565 }
566
567 int
rte_intr_callback_unregister_pending(const struct rte_intr_handle * intr_handle,rte_intr_callback_fn cb_fn,void * cb_arg,rte_intr_unregister_callback_fn ucb_fn)568 rte_intr_callback_unregister_pending(const struct rte_intr_handle *intr_handle,
569 rte_intr_callback_fn cb_fn, void *cb_arg,
570 rte_intr_unregister_callback_fn ucb_fn)
571 {
572 int ret;
573 struct rte_intr_source *src;
574 struct rte_intr_callback *cb, *next;
575
576 /* do parameter checking first */
577 if (rte_intr_fd_get(intr_handle) < 0) {
578 EAL_LOG(ERR, "Unregistering with invalid input parameter");
579 return -EINVAL;
580 }
581
582 rte_spinlock_lock(&intr_lock);
583
584 /* check if the interrupt source for the fd is existent */
585 TAILQ_FOREACH(src, &intr_sources, next) {
586 if (rte_intr_fd_get(src->intr_handle) == rte_intr_fd_get(intr_handle))
587 break;
588 }
589
590 /* No interrupt source registered for the fd */
591 if (src == NULL) {
592 ret = -ENOENT;
593
594 /* only usable if the source is active */
595 } else if (src->active == 0) {
596 ret = -EAGAIN;
597
598 } else {
599 ret = 0;
600
601 /* walk through the callbacks and mark all that match. */
602 for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) {
603 next = TAILQ_NEXT(cb, next);
604 if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 ||
605 cb->cb_arg == cb_arg)) {
606 cb->pending_delete = 1;
607 cb->ucb_fn = ucb_fn;
608 ret++;
609 }
610 }
611 }
612
613 rte_spinlock_unlock(&intr_lock);
614
615 return ret;
616 }
617
618 int
rte_intr_callback_unregister(const struct rte_intr_handle * intr_handle,rte_intr_callback_fn cb_fn,void * cb_arg)619 rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle,
620 rte_intr_callback_fn cb_fn, void *cb_arg)
621 {
622 int ret;
623 struct rte_intr_source *src;
624 struct rte_intr_callback *cb, *next;
625
626 /* do parameter checking first */
627 if (rte_intr_fd_get(intr_handle) < 0) {
628 EAL_LOG(ERR, "Unregistering with invalid input parameter");
629 return -EINVAL;
630 }
631
632 rte_spinlock_lock(&intr_lock);
633
634 /* check if the interrupt source for the fd is existent */
635 TAILQ_FOREACH(src, &intr_sources, next)
636 if (rte_intr_fd_get(src->intr_handle) == rte_intr_fd_get(intr_handle))
637 break;
638
639 /* No interrupt source registered for the fd */
640 if (src == NULL) {
641 ret = -ENOENT;
642
643 /* interrupt source has some active callbacks right now. */
644 } else if (src->active != 0) {
645 ret = -EAGAIN;
646
647 /* ok to remove. */
648 } else {
649 ret = 0;
650
651 /*walk through the callbacks and remove all that match. */
652 for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) {
653
654 next = TAILQ_NEXT(cb, next);
655
656 if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 ||
657 cb->cb_arg == cb_arg)) {
658 TAILQ_REMOVE(&src->callbacks, cb, next);
659 free(cb);
660 ret++;
661 }
662 }
663
664 /* all callbacks for that source are removed. */
665 if (TAILQ_EMPTY(&src->callbacks)) {
666 TAILQ_REMOVE(&intr_sources, src, next);
667 rte_intr_instance_free(src->intr_handle);
668 free(src);
669 }
670 }
671
672 rte_spinlock_unlock(&intr_lock);
673
674 /* notify the pipe fd waited by epoll_wait to rebuild the wait list */
675 if (ret >= 0 && write(intr_pipe.writefd, "1", 1) < 0) {
676 ret = -EPIPE;
677 }
678
679 rte_eal_trace_intr_callback_unregister(intr_handle, cb_fn, cb_arg,
680 ret);
681 return ret;
682 }
683
684 int
rte_intr_callback_unregister_sync(const struct rte_intr_handle * intr_handle,rte_intr_callback_fn cb_fn,void * cb_arg)685 rte_intr_callback_unregister_sync(const struct rte_intr_handle *intr_handle,
686 rte_intr_callback_fn cb_fn, void *cb_arg)
687 {
688 int ret = 0;
689
690 while ((ret = rte_intr_callback_unregister(intr_handle, cb_fn, cb_arg)) == -EAGAIN)
691 rte_pause();
692
693 return ret;
694 }
695
696 int
rte_intr_enable(const struct rte_intr_handle * intr_handle)697 rte_intr_enable(const struct rte_intr_handle *intr_handle)
698 {
699 int rc = 0, uio_cfg_fd;
700
701 if (intr_handle == NULL)
702 return -1;
703
704 if (rte_intr_type_get(intr_handle) == RTE_INTR_HANDLE_VDEV) {
705 rc = 0;
706 goto out;
707 }
708
709 uio_cfg_fd = rte_intr_dev_fd_get(intr_handle);
710 if (rte_intr_fd_get(intr_handle) < 0 || uio_cfg_fd < 0) {
711 rc = -1;
712 goto out;
713 }
714
715 switch (rte_intr_type_get(intr_handle)) {
716 /* write to the uio fd to enable the interrupt */
717 case RTE_INTR_HANDLE_UIO:
718 if (uio_intr_enable(intr_handle))
719 rc = -1;
720 break;
721 case RTE_INTR_HANDLE_UIO_INTX:
722 if (uio_intx_intr_enable(intr_handle))
723 rc = -1;
724 break;
725 /* not used at this moment */
726 case RTE_INTR_HANDLE_ALARM:
727 rc = -1;
728 break;
729 #ifdef VFIO_PRESENT
730 case RTE_INTR_HANDLE_VFIO_MSIX:
731 if (vfio_enable_msix(intr_handle))
732 rc = -1;
733 break;
734 case RTE_INTR_HANDLE_VFIO_MSI:
735 if (vfio_enable_msi(intr_handle))
736 rc = -1;
737 break;
738 case RTE_INTR_HANDLE_VFIO_LEGACY:
739 if (vfio_enable_intx(intr_handle))
740 rc = -1;
741 break;
742 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE
743 case RTE_INTR_HANDLE_VFIO_REQ:
744 if (vfio_enable_req(intr_handle))
745 rc = -1;
746 break;
747 #endif
748 #endif
749 /* not used at this moment */
750 case RTE_INTR_HANDLE_DEV_EVENT:
751 rc = -1;
752 break;
753 /* unknown handle type */
754 default:
755 EAL_LOG(ERR, "Unknown handle type of fd %d",
756 rte_intr_fd_get(intr_handle));
757 rc = -1;
758 break;
759 }
760 out:
761 rte_eal_trace_intr_enable(intr_handle, rc);
762 return rc;
763 }
764
765 /**
766 * PMD generally calls this function at the end of its IRQ callback.
767 * Internally, it unmasks the interrupt if possible.
768 *
769 * For INTx, unmasking is required as the interrupt is auto-masked prior to
770 * invoking callback.
771 *
772 * For MSI/MSI-X, unmasking is typically not needed as the interrupt is not
773 * auto-masked. In fact, for interrupt handle types VFIO_MSIX and VFIO_MSI,
774 * this function is no-op.
775 */
776 int
rte_intr_ack(const struct rte_intr_handle * intr_handle)777 rte_intr_ack(const struct rte_intr_handle *intr_handle)
778 {
779 int uio_cfg_fd;
780
781 if (rte_intr_type_get(intr_handle) == RTE_INTR_HANDLE_VDEV)
782 return 0;
783
784 uio_cfg_fd = rte_intr_dev_fd_get(intr_handle);
785 if (rte_intr_fd_get(intr_handle) < 0 || uio_cfg_fd < 0)
786 return -1;
787
788 switch (rte_intr_type_get(intr_handle)) {
789 /* Both acking and enabling are same for UIO */
790 case RTE_INTR_HANDLE_UIO:
791 if (uio_intr_enable(intr_handle))
792 return -1;
793 break;
794 case RTE_INTR_HANDLE_UIO_INTX:
795 if (uio_intx_intr_enable(intr_handle))
796 return -1;
797 break;
798 /* not used at this moment */
799 case RTE_INTR_HANDLE_ALARM:
800 return -1;
801 #ifdef VFIO_PRESENT
802 /* VFIO MSI* is implicitly acked unlike INTx, nothing to do */
803 case RTE_INTR_HANDLE_VFIO_MSIX:
804 case RTE_INTR_HANDLE_VFIO_MSI:
805 return 0;
806 case RTE_INTR_HANDLE_VFIO_LEGACY:
807 if (vfio_ack_intx(intr_handle))
808 return -1;
809 break;
810 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE
811 case RTE_INTR_HANDLE_VFIO_REQ:
812 return -1;
813 #endif
814 #endif
815 /* not used at this moment */
816 case RTE_INTR_HANDLE_DEV_EVENT:
817 return -1;
818 /* unknown handle type */
819 default:
820 EAL_LOG(ERR, "Unknown handle type of fd %d",
821 rte_intr_fd_get(intr_handle));
822 return -1;
823 }
824
825 return 0;
826 }
827
828 int
rte_intr_disable(const struct rte_intr_handle * intr_handle)829 rte_intr_disable(const struct rte_intr_handle *intr_handle)
830 {
831 int rc = 0, uio_cfg_fd;
832
833 if (intr_handle == NULL)
834 return -1;
835
836 if (rte_intr_type_get(intr_handle) == RTE_INTR_HANDLE_VDEV) {
837 rc = 0;
838 goto out;
839 }
840
841 uio_cfg_fd = rte_intr_dev_fd_get(intr_handle);
842 if (rte_intr_fd_get(intr_handle) < 0 || uio_cfg_fd < 0) {
843 rc = -1;
844 goto out;
845 }
846
847 switch (rte_intr_type_get(intr_handle)) {
848 /* write to the uio fd to disable the interrupt */
849 case RTE_INTR_HANDLE_UIO:
850 if (uio_intr_disable(intr_handle))
851 rc = -1;
852 break;
853 case RTE_INTR_HANDLE_UIO_INTX:
854 if (uio_intx_intr_disable(intr_handle))
855 rc = -1;
856 break;
857 /* not used at this moment */
858 case RTE_INTR_HANDLE_ALARM:
859 rc = -1;
860 break;
861 #ifdef VFIO_PRESENT
862 case RTE_INTR_HANDLE_VFIO_MSIX:
863 if (vfio_disable_msix(intr_handle))
864 rc = -1;
865 break;
866 case RTE_INTR_HANDLE_VFIO_MSI:
867 if (vfio_disable_msi(intr_handle))
868 rc = -1;
869 break;
870 case RTE_INTR_HANDLE_VFIO_LEGACY:
871 if (vfio_disable_intx(intr_handle))
872 rc = -1;
873 break;
874 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE
875 case RTE_INTR_HANDLE_VFIO_REQ:
876 if (vfio_disable_req(intr_handle))
877 rc = -1;
878 break;
879 #endif
880 #endif
881 /* not used at this moment */
882 case RTE_INTR_HANDLE_DEV_EVENT:
883 rc = -1;
884 break;
885 /* unknown handle type */
886 default:
887 EAL_LOG(ERR, "Unknown handle type of fd %d",
888 rte_intr_fd_get(intr_handle));
889 rc = -1;
890 break;
891 }
892 out:
893 rte_eal_trace_intr_disable(intr_handle, rc);
894 return rc;
895 }
896
897 static int
eal_intr_process_interrupts(struct epoll_event * events,int nfds)898 eal_intr_process_interrupts(struct epoll_event *events, int nfds)
899 {
900 bool call = false;
901 int n, bytes_read, rv;
902 struct rte_intr_source *src;
903 struct rte_intr_callback *cb, *next;
904 union rte_intr_read_buffer buf;
905 struct rte_intr_callback active_cb;
906
907 for (n = 0; n < nfds; n++) {
908
909 /**
910 * if the pipe fd is ready to read, return out to
911 * rebuild the wait list.
912 */
913 if (events[n].data.fd == intr_pipe.readfd){
914 int r = read(intr_pipe.readfd, buf.charbuf,
915 sizeof(buf.charbuf));
916 RTE_SET_USED(r);
917 return -1;
918 }
919 rte_spinlock_lock(&intr_lock);
920 TAILQ_FOREACH(src, &intr_sources, next)
921 if (rte_intr_fd_get(src->intr_handle) == events[n].data.fd)
922 break;
923 if (src == NULL){
924 rte_spinlock_unlock(&intr_lock);
925 continue;
926 }
927
928 /* mark this interrupt source as active and release the lock. */
929 src->active = 1;
930 rte_spinlock_unlock(&intr_lock);
931
932 /* set the length to be read dor different handle type */
933 switch (rte_intr_type_get(src->intr_handle)) {
934 case RTE_INTR_HANDLE_UIO:
935 case RTE_INTR_HANDLE_UIO_INTX:
936 bytes_read = sizeof(buf.uio_intr_count);
937 break;
938 case RTE_INTR_HANDLE_ALARM:
939 bytes_read = sizeof(buf.timerfd_num);
940 break;
941 #ifdef VFIO_PRESENT
942 #ifdef HAVE_VFIO_DEV_REQ_INTERFACE
943 case RTE_INTR_HANDLE_VFIO_REQ:
944 #endif
945 case RTE_INTR_HANDLE_VFIO_MSIX:
946 case RTE_INTR_HANDLE_VFIO_MSI:
947 case RTE_INTR_HANDLE_VFIO_LEGACY:
948 bytes_read = sizeof(buf.vfio_intr_count);
949 break;
950 #endif
951 case RTE_INTR_HANDLE_VDEV:
952 case RTE_INTR_HANDLE_EXT:
953 bytes_read = 0;
954 call = true;
955 break;
956 case RTE_INTR_HANDLE_DEV_EVENT:
957 bytes_read = 0;
958 call = true;
959 break;
960 default:
961 bytes_read = 1;
962 break;
963 }
964
965 if (bytes_read > 0) {
966 /**
967 * read out to clear the ready-to-be-read flag
968 * for epoll_wait.
969 */
970 bytes_read = read(events[n].data.fd, &buf, bytes_read);
971 if (bytes_read < 0) {
972 if (errno == EINTR || errno == EWOULDBLOCK)
973 continue;
974
975 EAL_LOG(ERR, "Error reading from file "
976 "descriptor %d: %s",
977 events[n].data.fd,
978 strerror(errno));
979 /*
980 * The device is unplugged or buggy, remove
981 * it as an interrupt source and return to
982 * force the wait list to be rebuilt.
983 */
984 rte_spinlock_lock(&intr_lock);
985 TAILQ_REMOVE(&intr_sources, src, next);
986 rte_spinlock_unlock(&intr_lock);
987
988 for (cb = TAILQ_FIRST(&src->callbacks); cb;
989 cb = next) {
990 next = TAILQ_NEXT(cb, next);
991 TAILQ_REMOVE(&src->callbacks, cb, next);
992 free(cb);
993 }
994 rte_intr_instance_free(src->intr_handle);
995 free(src);
996 return -1;
997 } else if (bytes_read == 0)
998 EAL_LOG(ERR, "Read nothing from file "
999 "descriptor %d", events[n].data.fd);
1000 else
1001 call = true;
1002 }
1003
1004 /* grab a lock, again to call callbacks and update status. */
1005 rte_spinlock_lock(&intr_lock);
1006
1007 if (call) {
1008
1009 /* Finally, call all callbacks. */
1010 TAILQ_FOREACH(cb, &src->callbacks, next) {
1011
1012 /* make a copy and unlock. */
1013 active_cb = *cb;
1014 rte_spinlock_unlock(&intr_lock);
1015
1016 /* call the actual callback */
1017 active_cb.cb_fn(active_cb.cb_arg);
1018
1019 /*get the lock back. */
1020 rte_spinlock_lock(&intr_lock);
1021 }
1022 }
1023 /* we done with that interrupt source, release it. */
1024 src->active = 0;
1025
1026 rv = 0;
1027
1028 /* check if any callback are supposed to be removed */
1029 for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) {
1030 next = TAILQ_NEXT(cb, next);
1031 if (cb->pending_delete) {
1032 TAILQ_REMOVE(&src->callbacks, cb, next);
1033 if (cb->ucb_fn)
1034 cb->ucb_fn(src->intr_handle, cb->cb_arg);
1035 free(cb);
1036 rv++;
1037 }
1038 }
1039
1040 /* all callbacks for that source are removed. */
1041 if (TAILQ_EMPTY(&src->callbacks)) {
1042 TAILQ_REMOVE(&intr_sources, src, next);
1043 rte_intr_instance_free(src->intr_handle);
1044 free(src);
1045 }
1046
1047 /* notify the pipe fd waited by epoll_wait to rebuild the wait list */
1048 if (rv > 0 && write(intr_pipe.writefd, "1", 1) < 0) {
1049 rte_spinlock_unlock(&intr_lock);
1050 return -EPIPE;
1051 }
1052
1053 rte_spinlock_unlock(&intr_lock);
1054 }
1055
1056 return 0;
1057 }
1058
1059 /**
1060 * It handles all the interrupts.
1061 *
1062 * @param pfd
1063 * epoll file descriptor.
1064 * @param totalfds
1065 * The number of file descriptors added in epoll.
1066 *
1067 * @return
1068 * void
1069 */
1070 static void
eal_intr_handle_interrupts(int pfd,unsigned totalfds)1071 eal_intr_handle_interrupts(int pfd, unsigned totalfds)
1072 {
1073 struct epoll_event events[totalfds];
1074 int nfds = 0;
1075
1076 for(;;) {
1077 nfds = epoll_wait(pfd, events, totalfds,
1078 EAL_INTR_EPOLL_WAIT_FOREVER);
1079 /* epoll_wait fail */
1080 if (nfds < 0) {
1081 if (errno == EINTR)
1082 continue;
1083 EAL_LOG(ERR,
1084 "epoll_wait returns with fail");
1085 return;
1086 }
1087 /* epoll_wait timeout, will never happens here */
1088 else if (nfds == 0)
1089 continue;
1090 /* epoll_wait has at least one fd ready to read */
1091 if (eal_intr_process_interrupts(events, nfds) < 0)
1092 return;
1093 }
1094 }
1095
1096 /**
1097 * It builds/rebuilds up the epoll file descriptor with all the
1098 * file descriptors being waited on. Then handles the interrupts.
1099 *
1100 * @param arg
1101 * pointer. (unused)
1102 *
1103 * @return
1104 * never return;
1105 */
1106 static __rte_noreturn uint32_t
eal_intr_thread_main(__rte_unused void * arg)1107 eal_intr_thread_main(__rte_unused void *arg)
1108 {
1109 /* host thread, never break out */
1110 for (;;) {
1111 /* build up the epoll fd with all descriptors we are to
1112 * wait on then pass it to the handle_interrupts function
1113 */
1114 static struct epoll_event pipe_event = {
1115 .events = EPOLLIN | EPOLLPRI,
1116 };
1117 struct rte_intr_source *src;
1118 unsigned numfds = 0;
1119
1120 /* create epoll fd */
1121 int pfd = epoll_create(1);
1122 if (pfd < 0)
1123 rte_panic("Cannot create epoll instance\n");
1124
1125 pipe_event.data.fd = intr_pipe.readfd;
1126 /**
1127 * add pipe fd into wait list, this pipe is used to
1128 * rebuild the wait list.
1129 */
1130 if (epoll_ctl(pfd, EPOLL_CTL_ADD, intr_pipe.readfd,
1131 &pipe_event) < 0) {
1132 rte_panic("Error adding fd to %d epoll_ctl, %s\n",
1133 intr_pipe.readfd, strerror(errno));
1134 }
1135 numfds++;
1136
1137 rte_spinlock_lock(&intr_lock);
1138
1139 TAILQ_FOREACH(src, &intr_sources, next) {
1140 struct epoll_event ev;
1141
1142 if (src->callbacks.tqh_first == NULL)
1143 continue; /* skip those with no callbacks */
1144 memset(&ev, 0, sizeof(ev));
1145 ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
1146 ev.data.fd = rte_intr_fd_get(src->intr_handle);
1147
1148 /**
1149 * add all the uio device file descriptor
1150 * into wait list.
1151 */
1152 if (epoll_ctl(pfd, EPOLL_CTL_ADD,
1153 rte_intr_fd_get(src->intr_handle), &ev) < 0) {
1154 rte_panic("Error adding fd %d epoll_ctl, %s\n",
1155 rte_intr_fd_get(src->intr_handle),
1156 strerror(errno));
1157 }
1158 else
1159 numfds++;
1160 }
1161 rte_spinlock_unlock(&intr_lock);
1162 /* serve the interrupt */
1163 eal_intr_handle_interrupts(pfd, numfds);
1164
1165 /**
1166 * when we return, we need to rebuild the
1167 * list of fds to monitor.
1168 */
1169 close(pfd);
1170 }
1171 }
1172
1173 int
rte_eal_intr_init(void)1174 rte_eal_intr_init(void)
1175 {
1176 int ret = 0;
1177
1178 /* init the global interrupt source head */
1179 TAILQ_INIT(&intr_sources);
1180
1181 /**
1182 * create a pipe which will be waited by epoll and notified to
1183 * rebuild the wait list of epoll.
1184 */
1185 if (pipe(intr_pipe.pipefd) < 0) {
1186 rte_errno = errno;
1187 return -1;
1188 }
1189
1190 /* create the host thread to wait/handle the interrupt */
1191 ret = rte_thread_create_internal_control(&intr_thread, "intr",
1192 eal_intr_thread_main, NULL);
1193 if (ret != 0) {
1194 rte_errno = -ret;
1195 EAL_LOG(ERR,
1196 "Failed to create thread for interrupt handling");
1197 }
1198
1199 return ret;
1200 }
1201
1202 static void
eal_intr_proc_rxtx_intr(int fd,const struct rte_intr_handle * intr_handle)1203 eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle)
1204 {
1205 union rte_intr_read_buffer buf;
1206 int bytes_read = 0;
1207 int nbytes;
1208
1209 switch (rte_intr_type_get(intr_handle)) {
1210 case RTE_INTR_HANDLE_UIO:
1211 case RTE_INTR_HANDLE_UIO_INTX:
1212 bytes_read = sizeof(buf.uio_intr_count);
1213 break;
1214 #ifdef VFIO_PRESENT
1215 case RTE_INTR_HANDLE_VFIO_MSIX:
1216 case RTE_INTR_HANDLE_VFIO_MSI:
1217 case RTE_INTR_HANDLE_VFIO_LEGACY:
1218 bytes_read = sizeof(buf.vfio_intr_count);
1219 break;
1220 #endif
1221 case RTE_INTR_HANDLE_VDEV:
1222 bytes_read = rte_intr_efd_counter_size_get(intr_handle);
1223 /* For vdev, number of bytes to read is set by driver */
1224 break;
1225 case RTE_INTR_HANDLE_EXT:
1226 return;
1227 default:
1228 bytes_read = 1;
1229 EAL_LOG(INFO, "unexpected intr type");
1230 break;
1231 }
1232
1233 /**
1234 * read out to clear the ready-to-be-read flag
1235 * for epoll_wait.
1236 */
1237 if (bytes_read == 0)
1238 return;
1239 do {
1240 nbytes = read(fd, &buf, bytes_read);
1241 if (nbytes < 0) {
1242 if (errno == EINTR || errno == EWOULDBLOCK ||
1243 errno == EAGAIN)
1244 continue;
1245 EAL_LOG(ERR,
1246 "Error reading from fd %d: %s",
1247 fd, strerror(errno));
1248 } else if (nbytes == 0)
1249 EAL_LOG(ERR, "Read nothing from fd %d", fd);
1250 return;
1251 } while (1);
1252 }
1253
1254 static int
eal_epoll_process_event(struct epoll_event * evs,unsigned int n,struct rte_epoll_event * events)1255 eal_epoll_process_event(struct epoll_event *evs, unsigned int n,
1256 struct rte_epoll_event *events)
1257 {
1258 unsigned int i, count = 0;
1259 struct rte_epoll_event *rev;
1260 uint32_t valid_status;
1261
1262 for (i = 0; i < n; i++) {
1263 rev = evs[i].data.ptr;
1264 valid_status = RTE_EPOLL_VALID;
1265 /* ACQUIRE memory ordering here pairs with RELEASE
1266 * ordering below acting as a lock to synchronize
1267 * the event data updating.
1268 */
1269 if (!rev || !rte_atomic_compare_exchange_strong_explicit(&rev->status,
1270 &valid_status, RTE_EPOLL_EXEC,
1271 rte_memory_order_acquire, rte_memory_order_relaxed))
1272 continue;
1273
1274 events[count].status = RTE_EPOLL_VALID;
1275 events[count].fd = rev->fd;
1276 events[count].epfd = rev->epfd;
1277 events[count].epdata.event = evs[i].events;
1278 events[count].epdata.data = rev->epdata.data;
1279 if (rev->epdata.cb_fun)
1280 rev->epdata.cb_fun(rev->fd,
1281 rev->epdata.cb_arg);
1282
1283 /* the status update should be observed after
1284 * the other fields change.
1285 */
1286 rte_atomic_store_explicit(&rev->status, RTE_EPOLL_VALID,
1287 rte_memory_order_release);
1288 count++;
1289 }
1290 return count;
1291 }
1292
1293 static inline int
eal_init_tls_epfd(void)1294 eal_init_tls_epfd(void)
1295 {
1296 int pfd = epoll_create(255);
1297
1298 if (pfd < 0) {
1299 EAL_LOG(ERR,
1300 "Cannot create epoll instance");
1301 return -1;
1302 }
1303 return pfd;
1304 }
1305
1306 int
rte_intr_tls_epfd(void)1307 rte_intr_tls_epfd(void)
1308 {
1309 if (RTE_PER_LCORE(_epfd) == -1)
1310 RTE_PER_LCORE(_epfd) = eal_init_tls_epfd();
1311
1312 return RTE_PER_LCORE(_epfd);
1313 }
1314
1315 static int
eal_epoll_wait(int epfd,struct rte_epoll_event * events,int maxevents,int timeout,bool interruptible)1316 eal_epoll_wait(int epfd, struct rte_epoll_event *events,
1317 int maxevents, int timeout, bool interruptible)
1318 {
1319 struct epoll_event evs[maxevents];
1320 int rc;
1321
1322 if (!events) {
1323 EAL_LOG(ERR, "rte_epoll_event can't be NULL");
1324 return -1;
1325 }
1326
1327 /* using per thread epoll fd */
1328 if (epfd == RTE_EPOLL_PER_THREAD)
1329 epfd = rte_intr_tls_epfd();
1330
1331 while (1) {
1332 rc = epoll_wait(epfd, evs, maxevents, timeout);
1333 if (likely(rc > 0)) {
1334 /* epoll_wait has at least one fd ready to read */
1335 rc = eal_epoll_process_event(evs, rc, events);
1336 break;
1337 } else if (rc < 0) {
1338 if (errno == EINTR) {
1339 if (interruptible)
1340 return -1;
1341 else
1342 continue;
1343 }
1344 /* epoll_wait fail */
1345 EAL_LOG(ERR, "epoll_wait returns with fail %s",
1346 strerror(errno));
1347 rc = -1;
1348 break;
1349 } else {
1350 /* rc == 0, epoll_wait timed out */
1351 break;
1352 }
1353 }
1354
1355 return rc;
1356 }
1357
1358 int
rte_epoll_wait(int epfd,struct rte_epoll_event * events,int maxevents,int timeout)1359 rte_epoll_wait(int epfd, struct rte_epoll_event *events,
1360 int maxevents, int timeout)
1361 {
1362 return eal_epoll_wait(epfd, events, maxevents, timeout, false);
1363 }
1364
1365 int
rte_epoll_wait_interruptible(int epfd,struct rte_epoll_event * events,int maxevents,int timeout)1366 rte_epoll_wait_interruptible(int epfd, struct rte_epoll_event *events,
1367 int maxevents, int timeout)
1368 {
1369 return eal_epoll_wait(epfd, events, maxevents, timeout, true);
1370 }
1371
1372 static inline void
eal_epoll_data_safe_free(struct rte_epoll_event * ev)1373 eal_epoll_data_safe_free(struct rte_epoll_event *ev)
1374 {
1375 uint32_t valid_status = RTE_EPOLL_VALID;
1376
1377 while (!rte_atomic_compare_exchange_strong_explicit(&ev->status, &valid_status,
1378 RTE_EPOLL_INVALID, rte_memory_order_acquire, rte_memory_order_relaxed)) {
1379 while (rte_atomic_load_explicit(&ev->status,
1380 rte_memory_order_relaxed) != RTE_EPOLL_VALID)
1381 rte_pause();
1382 valid_status = RTE_EPOLL_VALID;
1383 }
1384 memset(&ev->epdata, 0, sizeof(ev->epdata));
1385 ev->fd = -1;
1386 ev->epfd = -1;
1387 }
1388
1389 int
rte_epoll_ctl(int epfd,int op,int fd,struct rte_epoll_event * event)1390 rte_epoll_ctl(int epfd, int op, int fd,
1391 struct rte_epoll_event *event)
1392 {
1393 struct epoll_event ev;
1394
1395 if (!event) {
1396 EAL_LOG(ERR, "rte_epoll_event can't be NULL");
1397 return -1;
1398 }
1399
1400 /* using per thread epoll fd */
1401 if (epfd == RTE_EPOLL_PER_THREAD)
1402 epfd = rte_intr_tls_epfd();
1403
1404 if (op == EPOLL_CTL_ADD) {
1405 rte_atomic_store_explicit(&event->status, RTE_EPOLL_VALID,
1406 rte_memory_order_relaxed);
1407 event->fd = fd; /* ignore fd in event */
1408 event->epfd = epfd;
1409 ev.data.ptr = (void *)event;
1410 }
1411
1412 ev.events = event->epdata.event;
1413 if (epoll_ctl(epfd, op, fd, &ev) < 0) {
1414 EAL_LOG(ERR, "Error op %d fd %d epoll_ctl, %s",
1415 op, fd, strerror(errno));
1416 if (op == EPOLL_CTL_ADD)
1417 /* rollback status when CTL_ADD fail */
1418 rte_atomic_store_explicit(&event->status, RTE_EPOLL_INVALID,
1419 rte_memory_order_relaxed);
1420 return -1;
1421 }
1422
1423 if (op == EPOLL_CTL_DEL && rte_atomic_load_explicit(&event->status,
1424 rte_memory_order_relaxed) != RTE_EPOLL_INVALID)
1425 eal_epoll_data_safe_free(event);
1426
1427 return 0;
1428 }
1429
1430 int
rte_intr_rx_ctl(struct rte_intr_handle * intr_handle,int epfd,int op,unsigned int vec,void * data)1431 rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, int epfd,
1432 int op, unsigned int vec, void *data)
1433 {
1434 struct rte_epoll_event *rev;
1435 struct rte_epoll_data *epdata;
1436 int epfd_op;
1437 unsigned int efd_idx;
1438 int rc = 0;
1439
1440 efd_idx = (vec >= RTE_INTR_VEC_RXTX_OFFSET) ?
1441 (vec - RTE_INTR_VEC_RXTX_OFFSET) : vec;
1442
1443 if (intr_handle == NULL || rte_intr_nb_efd_get(intr_handle) == 0 ||
1444 efd_idx >= (unsigned int)rte_intr_nb_efd_get(intr_handle)) {
1445 EAL_LOG(ERR, "Wrong intr vector number.");
1446 return -EPERM;
1447 }
1448
1449 switch (op) {
1450 case RTE_INTR_EVENT_ADD:
1451 epfd_op = EPOLL_CTL_ADD;
1452 rev = rte_intr_elist_index_get(intr_handle, efd_idx);
1453 if (rte_atomic_load_explicit(&rev->status,
1454 rte_memory_order_relaxed) != RTE_EPOLL_INVALID) {
1455 EAL_LOG(INFO, "Event already been added.");
1456 return -EEXIST;
1457 }
1458
1459 /* attach to intr vector fd */
1460 epdata = &rev->epdata;
1461 epdata->event = EPOLLIN | EPOLLPRI | EPOLLET;
1462 epdata->data = data;
1463 epdata->cb_fun = (rte_intr_event_cb_t)eal_intr_proc_rxtx_intr;
1464 epdata->cb_arg = (void *)intr_handle;
1465 rc = rte_epoll_ctl(epfd, epfd_op,
1466 rte_intr_efds_index_get(intr_handle, efd_idx), rev);
1467 if (!rc)
1468 EAL_LOG(DEBUG,
1469 "efd %d associated with vec %d added on epfd %d",
1470 rev->fd, vec, epfd);
1471 else
1472 rc = -EPERM;
1473 break;
1474 case RTE_INTR_EVENT_DEL:
1475 epfd_op = EPOLL_CTL_DEL;
1476 rev = rte_intr_elist_index_get(intr_handle, efd_idx);
1477 if (rte_atomic_load_explicit(&rev->status,
1478 rte_memory_order_relaxed) == RTE_EPOLL_INVALID) {
1479 EAL_LOG(INFO, "Event does not exist.");
1480 return -EPERM;
1481 }
1482
1483 rc = rte_epoll_ctl(rev->epfd, epfd_op, rev->fd, rev);
1484 if (rc)
1485 rc = -EPERM;
1486 break;
1487 default:
1488 EAL_LOG(ERR, "event op type mismatch");
1489 rc = -EPERM;
1490 }
1491
1492 return rc;
1493 }
1494
1495 void
rte_intr_free_epoll_fd(struct rte_intr_handle * intr_handle)1496 rte_intr_free_epoll_fd(struct rte_intr_handle *intr_handle)
1497 {
1498 uint32_t i;
1499 struct rte_epoll_event *rev;
1500
1501 for (i = 0; i < (uint32_t)rte_intr_nb_efd_get(intr_handle); i++) {
1502 rev = rte_intr_elist_index_get(intr_handle, i);
1503 if (rte_atomic_load_explicit(&rev->status,
1504 rte_memory_order_relaxed) == RTE_EPOLL_INVALID)
1505 continue;
1506 if (rte_epoll_ctl(rev->epfd, EPOLL_CTL_DEL, rev->fd, rev)) {
1507 /* force free if the entry valid */
1508 eal_epoll_data_safe_free(rev);
1509 }
1510 }
1511 }
1512
1513 int
rte_intr_efd_enable(struct rte_intr_handle * intr_handle,uint32_t nb_efd)1514 rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd)
1515 {
1516 uint32_t i;
1517 int fd;
1518 uint32_t n = RTE_MIN(nb_efd, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
1519
1520 assert(nb_efd != 0);
1521
1522 if (rte_intr_type_get(intr_handle) == RTE_INTR_HANDLE_VFIO_MSIX) {
1523 for (i = 0; i < n; i++) {
1524 fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
1525 if (fd < 0) {
1526 EAL_LOG(ERR,
1527 "can't setup eventfd, error %i (%s)",
1528 errno, strerror(errno));
1529 return -errno;
1530 }
1531
1532 if (rte_intr_efds_index_set(intr_handle, i, fd))
1533 return -rte_errno;
1534 }
1535
1536 if (rte_intr_nb_efd_set(intr_handle, n))
1537 return -rte_errno;
1538
1539 if (rte_intr_max_intr_set(intr_handle, NB_OTHER_INTR + n))
1540 return -rte_errno;
1541 } else if (rte_intr_type_get(intr_handle) == RTE_INTR_HANDLE_VDEV) {
1542 /* only check, initialization would be done in vdev driver.*/
1543 if ((uint64_t)rte_intr_efd_counter_size_get(intr_handle) >
1544 sizeof(union rte_intr_read_buffer)) {
1545 EAL_LOG(ERR, "the efd_counter_size is oversized");
1546 return -EINVAL;
1547 }
1548 } else {
1549 if (rte_intr_efds_index_set(intr_handle, 0, rte_intr_fd_get(intr_handle)))
1550 return -rte_errno;
1551 if (rte_intr_nb_efd_set(intr_handle, RTE_MIN(nb_efd, 1U)))
1552 return -rte_errno;
1553 if (rte_intr_max_intr_set(intr_handle, NB_OTHER_INTR))
1554 return -rte_errno;
1555 }
1556
1557 return 0;
1558 }
1559
1560 void
rte_intr_efd_disable(struct rte_intr_handle * intr_handle)1561 rte_intr_efd_disable(struct rte_intr_handle *intr_handle)
1562 {
1563 uint32_t i;
1564
1565 rte_intr_free_epoll_fd(intr_handle);
1566 if (rte_intr_max_intr_get(intr_handle) > rte_intr_nb_efd_get(intr_handle)) {
1567 for (i = 0; i < (uint32_t)rte_intr_nb_efd_get(intr_handle); i++)
1568 close(rte_intr_efds_index_get(intr_handle, i));
1569 }
1570 rte_intr_nb_efd_set(intr_handle, 0);
1571 rte_intr_max_intr_set(intr_handle, 0);
1572 }
1573
1574 int
rte_intr_dp_is_en(struct rte_intr_handle * intr_handle)1575 rte_intr_dp_is_en(struct rte_intr_handle *intr_handle)
1576 {
1577 return !(!rte_intr_nb_efd_get(intr_handle));
1578 }
1579
1580 int
rte_intr_allow_others(struct rte_intr_handle * intr_handle)1581 rte_intr_allow_others(struct rte_intr_handle *intr_handle)
1582 {
1583 if (!rte_intr_dp_is_en(intr_handle))
1584 return 1;
1585 else
1586 return !!(rte_intr_max_intr_get(intr_handle) -
1587 rte_intr_nb_efd_get(intr_handle));
1588 }
1589
1590 int
rte_intr_cap_multiple(struct rte_intr_handle * intr_handle)1591 rte_intr_cap_multiple(struct rte_intr_handle *intr_handle)
1592 {
1593 if (rte_intr_type_get(intr_handle) == RTE_INTR_HANDLE_VFIO_MSIX)
1594 return 1;
1595
1596 if (rte_intr_type_get(intr_handle) == RTE_INTR_HANDLE_VDEV)
1597 return 1;
1598
1599 return 0;
1600 }
1601
rte_thread_is_intr(void)1602 int rte_thread_is_intr(void)
1603 {
1604 return rte_thread_equal(intr_thread, rte_thread_self());
1605 }
1606