1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
3 */
4
5 #include <errno.h>
6 #include <stdio.h>
7 #include <string.h>
8 #include <sys/epoll.h>
9 #include <unistd.h>
10
11 #include <rte_common.h>
12 #include <rte_log.h>
13 #include <rte_malloc.h>
14 #include <rte_string_fns.h>
15 #include <rte_thread.h>
16
17 #include "fd_man.h"
18
19 RTE_LOG_REGISTER_SUFFIX(vhost_fdset_logtype, fdset, INFO);
20 #define RTE_LOGTYPE_VHOST_FDMAN vhost_fdset_logtype
21 #define VHOST_FDMAN_LOG(level, ...) \
22 RTE_LOG_LINE(level, VHOST_FDMAN, "" __VA_ARGS__)
23
24 struct fdentry {
25 int fd; /* -1 indicates this entry is empty */
26 fd_cb rcb; /* callback when this fd is readable. */
27 fd_cb wcb; /* callback when this fd is writeable.*/
28 void *dat; /* fd context */
29 int busy; /* whether this entry is being used in cb. */
30 LIST_ENTRY(fdentry) next;
31 };
32
33 struct fdset {
34 char name[RTE_THREAD_NAME_SIZE];
35 int epfd;
36 struct fdentry fd[MAX_FDS];
37 LIST_HEAD(, fdentry) fdlist;
38 int next_free_idx;
39 rte_thread_t tid;
40 pthread_mutex_t fd_mutex;
41 bool destroy;
42 };
43
44 #define MAX_FDSETS 8
45
46 static struct fdset *fdsets[MAX_FDSETS];
47 static pthread_mutex_t fdsets_mutex = PTHREAD_MUTEX_INITIALIZER;
48
49 static uint32_t fdset_event_dispatch(void *arg);
50
51 static struct fdset *
fdset_lookup(const char * name)52 fdset_lookup(const char *name)
53 {
54 int i;
55
56 for (i = 0; i < MAX_FDSETS; i++) {
57 struct fdset *fdset = fdsets[i];
58 if (fdset == NULL)
59 continue;
60
61 if (!strncmp(fdset->name, name, RTE_THREAD_NAME_SIZE))
62 return fdset;
63 }
64
65 return NULL;
66 }
67
68 static int
fdset_insert(struct fdset * fdset)69 fdset_insert(struct fdset *fdset)
70 {
71 int i;
72
73 for (i = 0; i < MAX_FDSETS; i++) {
74 if (fdsets[i] == NULL) {
75 fdsets[i] = fdset;
76 return 0;
77 }
78 }
79
80 return -1;
81 }
82
83 struct fdset *
fdset_init(const char * name)84 fdset_init(const char *name)
85 {
86 struct fdset *fdset;
87 uint32_t val;
88 int i;
89
90 pthread_mutex_lock(&fdsets_mutex);
91 fdset = fdset_lookup(name);
92 if (fdset) {
93 pthread_mutex_unlock(&fdsets_mutex);
94 return fdset;
95 }
96
97 fdset = rte_zmalloc(NULL, sizeof(*fdset), 0);
98 if (!fdset) {
99 VHOST_FDMAN_LOG(ERR, "failed to alloc fdset %s", name);
100 goto err_unlock;
101 }
102
103 rte_strscpy(fdset->name, name, RTE_THREAD_NAME_SIZE);
104
105 pthread_mutex_init(&fdset->fd_mutex, NULL);
106
107 for (i = 0; i < (int)RTE_DIM(fdset->fd); i++) {
108 fdset->fd[i].fd = -1;
109 fdset->fd[i].dat = NULL;
110 }
111 LIST_INIT(&fdset->fdlist);
112
113 /*
114 * Any non-zero value would work (see man epoll_create),
115 * but pass MAX_FDS for consistency.
116 */
117 fdset->epfd = epoll_create(MAX_FDS);
118 if (fdset->epfd < 0) {
119 VHOST_FDMAN_LOG(ERR, "failed to create epoll for %s fdset", name);
120 goto err_free;
121 }
122
123 if (rte_thread_create_internal_control(&fdset->tid, fdset->name,
124 fdset_event_dispatch, fdset)) {
125 VHOST_FDMAN_LOG(ERR, "Failed to create %s event dispatch thread",
126 fdset->name);
127 goto err_epoll;
128 }
129
130 if (fdset_insert(fdset)) {
131 VHOST_FDMAN_LOG(ERR, "Failed to insert fdset %s", name);
132 goto err_thread;
133 }
134
135 pthread_mutex_unlock(&fdsets_mutex);
136
137 return fdset;
138
139 err_thread:
140 fdset->destroy = true;
141 rte_thread_join(fdset->tid, &val);
142 err_epoll:
143 close(fdset->epfd);
144 err_free:
145 rte_free(fdset);
146 err_unlock:
147 pthread_mutex_unlock(&fdsets_mutex);
148
149 return NULL;
150 }
151
152 static int
fdset_insert_entry(struct fdset * pfdset,int fd,fd_cb rcb,fd_cb wcb,void * dat)153 fdset_insert_entry(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
154 {
155 struct fdentry *pfdentry;
156
157 if (pfdset->next_free_idx >= (int)RTE_DIM(pfdset->fd))
158 return -1;
159
160 pfdentry = &pfdset->fd[pfdset->next_free_idx];
161 pfdentry->fd = fd;
162 pfdentry->rcb = rcb;
163 pfdentry->wcb = wcb;
164 pfdentry->dat = dat;
165
166 LIST_INSERT_HEAD(&pfdset->fdlist, pfdentry, next);
167
168 /* Find next free slot */
169 pfdset->next_free_idx++;
170 for (; pfdset->next_free_idx < (int)RTE_DIM(pfdset->fd); pfdset->next_free_idx++) {
171 if (pfdset->fd[pfdset->next_free_idx].fd != -1)
172 continue;
173 break;
174 }
175
176 return 0;
177 }
178
179 static void
fdset_remove_entry(struct fdset * pfdset,struct fdentry * pfdentry)180 fdset_remove_entry(struct fdset *pfdset, struct fdentry *pfdentry)
181 {
182 int entry_idx;
183
184 pfdentry->fd = -1;
185 pfdentry->rcb = pfdentry->wcb = NULL;
186 pfdentry->dat = NULL;
187
188 entry_idx = pfdentry - pfdset->fd;
189 if (entry_idx < pfdset->next_free_idx)
190 pfdset->next_free_idx = entry_idx;
191
192 LIST_REMOVE(pfdentry, next);
193 }
194
195 static struct fdentry *
fdset_find_entry_locked(struct fdset * pfdset,int fd)196 fdset_find_entry_locked(struct fdset *pfdset, int fd)
197 {
198 struct fdentry *pfdentry;
199
200 LIST_FOREACH(pfdentry, &pfdset->fdlist, next) {
201 if (pfdentry->fd != fd)
202 continue;
203 return pfdentry;
204 }
205
206 return NULL;
207 }
208
209 /**
210 * Register the fd in the fdset with read/write handler and context.
211 */
212 int
fdset_add(struct fdset * pfdset,int fd,fd_cb rcb,fd_cb wcb,void * dat)213 fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
214 {
215 struct epoll_event ev;
216 struct fdentry *pfdentry;
217 int ret = 0;
218
219 if (pfdset == NULL || fd == -1) {
220 ret = -1;
221 goto out;
222 }
223
224 pthread_mutex_lock(&pfdset->fd_mutex);
225 ret = fdset_insert_entry(pfdset, fd, rcb, wcb, dat);
226 if (ret < 0) {
227 VHOST_FDMAN_LOG(ERR, "failed to insert fdset entry");
228 pthread_mutex_unlock(&pfdset->fd_mutex);
229 goto out;
230 }
231 pthread_mutex_unlock(&pfdset->fd_mutex);
232
233 ev.events = EPOLLERR;
234 ev.events |= rcb ? EPOLLIN : 0;
235 ev.events |= wcb ? EPOLLOUT : 0;
236 ev.data.fd = fd;
237
238 ret = epoll_ctl(pfdset->epfd, EPOLL_CTL_ADD, fd, &ev);
239 if (ret < 0) {
240 VHOST_FDMAN_LOG(ERR, "could not add %d fd to %d epfd: %s",
241 fd, pfdset->epfd, strerror(errno));
242 goto out_remove;
243 }
244
245 return 0;
246 out_remove:
247 pthread_mutex_lock(&pfdset->fd_mutex);
248 pfdentry = fdset_find_entry_locked(pfdset, fd);
249 if (pfdentry)
250 fdset_remove_entry(pfdset, pfdentry);
251 pthread_mutex_unlock(&pfdset->fd_mutex);
252 out:
253 return ret;
254 }
255
256 static void
fdset_del_locked(struct fdset * pfdset,struct fdentry * pfdentry)257 fdset_del_locked(struct fdset *pfdset, struct fdentry *pfdentry)
258 {
259 if (epoll_ctl(pfdset->epfd, EPOLL_CTL_DEL, pfdentry->fd, NULL) == -1) {
260 if (errno == EBADF) /* File might have already been closed. */
261 VHOST_FDMAN_LOG(DEBUG, "could not remove %d fd from %d epfd: %s",
262 pfdentry->fd, pfdset->epfd, strerror(errno));
263 else
264 VHOST_FDMAN_LOG(ERR, "could not remove %d fd from %d epfd: %s",
265 pfdentry->fd, pfdset->epfd, strerror(errno));
266 }
267
268 fdset_remove_entry(pfdset, pfdentry);
269 }
270
271 void
fdset_del(struct fdset * pfdset,int fd)272 fdset_del(struct fdset *pfdset, int fd)
273 {
274 struct fdentry *pfdentry;
275
276 if (pfdset == NULL || fd == -1)
277 return;
278
279 do {
280 pthread_mutex_lock(&pfdset->fd_mutex);
281 pfdentry = fdset_find_entry_locked(pfdset, fd);
282 if (pfdentry != NULL && pfdentry->busy == 0) {
283 fdset_del_locked(pfdset, pfdentry);
284 pfdentry = NULL;
285 }
286 pthread_mutex_unlock(&pfdset->fd_mutex);
287 } while (pfdentry != NULL);
288 }
289
290 /**
291 * Unregister the fd from the fdset.
292 *
293 * If parameters are invalid, return directly -2.
294 * And check whether fd is busy, if yes, return -1.
295 * Otherwise, try to delete the fd from fdset and
296 * return true.
297 */
298 int
fdset_try_del(struct fdset * pfdset,int fd)299 fdset_try_del(struct fdset *pfdset, int fd)
300 {
301 struct fdentry *pfdentry;
302
303 if (pfdset == NULL || fd == -1)
304 return -2;
305
306 pthread_mutex_lock(&pfdset->fd_mutex);
307 pfdentry = fdset_find_entry_locked(pfdset, fd);
308 if (pfdentry != NULL && pfdentry->busy != 0) {
309 pthread_mutex_unlock(&pfdset->fd_mutex);
310 return -1;
311 }
312
313 if (pfdentry != NULL)
314 fdset_del_locked(pfdset, pfdentry);
315
316 pthread_mutex_unlock(&pfdset->fd_mutex);
317 return 0;
318 }
319
320 /**
321 * This functions runs in infinite blocking loop until there is no fd in
322 * pfdset. It calls corresponding r/w handler if there is event on the fd.
323 *
324 * Before the callback is called, we set the flag to busy status; If other
325 * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
326 * will wait until the flag is reset to zero(which indicates the callback is
327 * finished), then it could free the context after fdset_del.
328 */
329 static uint32_t
fdset_event_dispatch(void * arg)330 fdset_event_dispatch(void *arg)
331 {
332 int i;
333 fd_cb rcb, wcb;
334 void *dat;
335 int fd, numfds;
336 int remove1, remove2;
337 struct fdset *pfdset = arg;
338
339 if (pfdset == NULL)
340 return 0;
341
342 while (1) {
343 struct epoll_event events[MAX_FDS];
344 struct fdentry *pfdentry;
345
346 numfds = epoll_wait(pfdset->epfd, events, RTE_DIM(events), 1000);
347 if (numfds < 0)
348 continue;
349
350 for (i = 0; i < numfds; i++) {
351 pthread_mutex_lock(&pfdset->fd_mutex);
352
353 fd = events[i].data.fd;
354 pfdentry = fdset_find_entry_locked(pfdset, fd);
355 if (pfdentry == NULL) {
356 pthread_mutex_unlock(&pfdset->fd_mutex);
357 continue;
358 }
359
360 remove1 = remove2 = 0;
361
362 rcb = pfdentry->rcb;
363 wcb = pfdentry->wcb;
364 dat = pfdentry->dat;
365 pfdentry->busy = 1;
366
367 pthread_mutex_unlock(&pfdset->fd_mutex);
368
369 if (rcb && events[i].events & (EPOLLIN | EPOLLERR | EPOLLHUP))
370 rcb(fd, dat, &remove1);
371 if (wcb && events[i].events & (EPOLLOUT | EPOLLERR | EPOLLHUP))
372 wcb(fd, dat, &remove2);
373 pfdentry->busy = 0;
374 /*
375 * fdset_del needs to check busy flag.
376 * We don't allow fdset_del to be called in callback
377 * directly.
378 */
379 /*
380 * A concurrent fdset_del may have been waiting for the
381 * fdentry not to be busy, so we can't call
382 * fdset_del_locked().
383 */
384 if (remove1 || remove2)
385 fdset_del(pfdset, fd);
386 }
387
388 if (pfdset->destroy)
389 break;
390 }
391
392 return 0;
393 }
394