xref: /dpdk/lib/vhost/fd_man.c (revision 1e472b5746aeb6189fa254ab82ce4cd27999f868)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <errno.h>
6 #include <stdio.h>
7 #include <string.h>
8 #include <sys/epoll.h>
9 #include <unistd.h>
10 
11 #include <rte_common.h>
12 #include <rte_log.h>
13 #include <rte_malloc.h>
14 #include <rte_string_fns.h>
15 #include <rte_thread.h>
16 
17 #include "fd_man.h"
18 
19 RTE_LOG_REGISTER_SUFFIX(vhost_fdset_logtype, fdset, INFO);
20 #define RTE_LOGTYPE_VHOST_FDMAN vhost_fdset_logtype
21 #define VHOST_FDMAN_LOG(level, ...) \
22 	RTE_LOG_LINE(level, VHOST_FDMAN, "" __VA_ARGS__)
23 
24 struct fdentry {
25 	int fd;		/* -1 indicates this entry is empty */
26 	fd_cb rcb;	/* callback when this fd is readable. */
27 	fd_cb wcb;	/* callback when this fd is writeable.*/
28 	void *dat;	/* fd context */
29 	int busy;	/* whether this entry is being used in cb. */
30 	LIST_ENTRY(fdentry) next;
31 };
32 
33 struct fdset {
34 	char name[RTE_THREAD_NAME_SIZE];
35 	int epfd;
36 	struct fdentry fd[MAX_FDS];
37 	LIST_HEAD(, fdentry) fdlist;
38 	int next_free_idx;
39 	rte_thread_t tid;
40 	pthread_mutex_t fd_mutex;
41 	bool destroy;
42 };
43 
44 #define MAX_FDSETS 8
45 
46 static struct fdset *fdsets[MAX_FDSETS];
47 static pthread_mutex_t fdsets_mutex = PTHREAD_MUTEX_INITIALIZER;
48 
49 static uint32_t fdset_event_dispatch(void *arg);
50 
51 static struct fdset *
52 fdset_lookup(const char *name)
53 {
54 	int i;
55 
56 	for (i = 0; i < MAX_FDSETS; i++) {
57 		struct fdset *fdset = fdsets[i];
58 		if (fdset == NULL)
59 			continue;
60 
61 		if (!strncmp(fdset->name, name, RTE_THREAD_NAME_SIZE))
62 			return fdset;
63 	}
64 
65 	return NULL;
66 }
67 
68 static int
69 fdset_insert(struct fdset *fdset)
70 {
71 	int i;
72 
73 	for (i = 0; i < MAX_FDSETS; i++) {
74 		if (fdsets[i] == NULL) {
75 			fdsets[i] = fdset;
76 			return 0;
77 		}
78 	}
79 
80 	return -1;
81 }
82 
83 struct fdset *
84 fdset_init(const char *name)
85 {
86 	struct fdset *fdset;
87 	uint32_t val;
88 	int i;
89 
90 	pthread_mutex_lock(&fdsets_mutex);
91 	fdset = fdset_lookup(name);
92 	if (fdset) {
93 		pthread_mutex_unlock(&fdsets_mutex);
94 		return fdset;
95 	}
96 
97 	fdset = rte_zmalloc(NULL, sizeof(*fdset), 0);
98 	if (!fdset) {
99 		VHOST_FDMAN_LOG(ERR, "failed to alloc fdset %s", name);
100 		goto err_unlock;
101 	}
102 
103 	rte_strscpy(fdset->name, name, RTE_THREAD_NAME_SIZE);
104 
105 	pthread_mutex_init(&fdset->fd_mutex, NULL);
106 
107 	for (i = 0; i < (int)RTE_DIM(fdset->fd); i++) {
108 		fdset->fd[i].fd = -1;
109 		fdset->fd[i].dat = NULL;
110 	}
111 	LIST_INIT(&fdset->fdlist);
112 
113 	/*
114 	 * Any non-zero value would work (see man epoll_create),
115 	 * but pass MAX_FDS for consistency.
116 	 */
117 	fdset->epfd = epoll_create(MAX_FDS);
118 	if (fdset->epfd < 0) {
119 		VHOST_FDMAN_LOG(ERR, "failed to create epoll for %s fdset", name);
120 		goto err_free;
121 	}
122 
123 	if (rte_thread_create_internal_control(&fdset->tid, fdset->name,
124 					fdset_event_dispatch, fdset)) {
125 		VHOST_FDMAN_LOG(ERR, "Failed to create %s event dispatch thread",
126 				fdset->name);
127 		goto err_epoll;
128 	}
129 
130 	if (fdset_insert(fdset)) {
131 		VHOST_FDMAN_LOG(ERR, "Failed to insert fdset %s", name);
132 		goto err_thread;
133 	}
134 
135 	pthread_mutex_unlock(&fdsets_mutex);
136 
137 	return fdset;
138 
139 err_thread:
140 	fdset->destroy = true;
141 	rte_thread_join(fdset->tid, &val);
142 err_epoll:
143 	close(fdset->epfd);
144 err_free:
145 	rte_free(fdset);
146 err_unlock:
147 	pthread_mutex_unlock(&fdsets_mutex);
148 
149 	return NULL;
150 }
151 
152 static int
153 fdset_insert_entry(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
154 {
155 	struct fdentry *pfdentry;
156 
157 	if (pfdset->next_free_idx >= (int)RTE_DIM(pfdset->fd))
158 		return -1;
159 
160 	pfdentry = &pfdset->fd[pfdset->next_free_idx];
161 	pfdentry->fd  = fd;
162 	pfdentry->rcb = rcb;
163 	pfdentry->wcb = wcb;
164 	pfdentry->dat = dat;
165 
166 	LIST_INSERT_HEAD(&pfdset->fdlist, pfdentry, next);
167 
168 	/* Find next free slot */
169 	pfdset->next_free_idx++;
170 	for (; pfdset->next_free_idx < (int)RTE_DIM(pfdset->fd); pfdset->next_free_idx++) {
171 		if (pfdset->fd[pfdset->next_free_idx].fd != -1)
172 			continue;
173 		break;
174 	}
175 
176 	return 0;
177 }
178 
179 static void
180 fdset_remove_entry(struct fdset *pfdset, struct fdentry *pfdentry)
181 {
182 	int entry_idx;
183 
184 	pfdentry->fd = -1;
185 	pfdentry->rcb = pfdentry->wcb = NULL;
186 	pfdentry->dat = NULL;
187 
188 	entry_idx = pfdentry - pfdset->fd;
189 	if (entry_idx < pfdset->next_free_idx)
190 		pfdset->next_free_idx = entry_idx;
191 
192 	LIST_REMOVE(pfdentry, next);
193 }
194 
195 static struct fdentry *
196 fdset_find_entry_locked(struct fdset *pfdset, int fd)
197 {
198 	struct fdentry *pfdentry;
199 
200 	LIST_FOREACH(pfdentry, &pfdset->fdlist, next) {
201 		if (pfdentry->fd != fd)
202 			continue;
203 		return pfdentry;
204 	}
205 
206 	return NULL;
207 }
208 
209 /**
210  * Register the fd in the fdset with read/write handler and context.
211  */
212 int
213 fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
214 {
215 	struct epoll_event ev;
216 	struct fdentry *pfdentry;
217 	int ret = 0;
218 
219 	if (pfdset == NULL || fd == -1) {
220 		ret = -1;
221 		goto out;
222 	}
223 
224 	pthread_mutex_lock(&pfdset->fd_mutex);
225 	ret = fdset_insert_entry(pfdset, fd, rcb, wcb, dat);
226 	if (ret < 0) {
227 		VHOST_FDMAN_LOG(ERR, "failed to insert fdset entry");
228 		pthread_mutex_unlock(&pfdset->fd_mutex);
229 		goto out;
230 	}
231 	pthread_mutex_unlock(&pfdset->fd_mutex);
232 
233 	ev.events = EPOLLERR;
234 	ev.events |= rcb ? EPOLLIN : 0;
235 	ev.events |= wcb ? EPOLLOUT : 0;
236 	ev.data.fd = fd;
237 
238 	ret = epoll_ctl(pfdset->epfd, EPOLL_CTL_ADD, fd, &ev);
239 	if (ret < 0) {
240 		VHOST_FDMAN_LOG(ERR, "could not add %d fd to %d epfd: %s",
241 			fd, pfdset->epfd, strerror(errno));
242 		goto out_remove;
243 	}
244 
245 	return 0;
246 out_remove:
247 	pthread_mutex_lock(&pfdset->fd_mutex);
248 	pfdentry = fdset_find_entry_locked(pfdset, fd);
249 	if (pfdentry)
250 		fdset_remove_entry(pfdset, pfdentry);
251 	pthread_mutex_unlock(&pfdset->fd_mutex);
252 out:
253 	return ret;
254 }
255 
256 static void
257 fdset_del_locked(struct fdset *pfdset, struct fdentry *pfdentry)
258 {
259 	if (epoll_ctl(pfdset->epfd, EPOLL_CTL_DEL, pfdentry->fd, NULL) == -1) {
260 		if (errno == EBADF) /* File might have already been closed. */
261 			VHOST_FDMAN_LOG(DEBUG, "could not remove %d fd from %d epfd: %s",
262 				pfdentry->fd, pfdset->epfd, strerror(errno));
263 		else
264 			VHOST_FDMAN_LOG(ERR, "could not remove %d fd from %d epfd: %s",
265 				pfdentry->fd, pfdset->epfd, strerror(errno));
266 	}
267 
268 	fdset_remove_entry(pfdset, pfdentry);
269 }
270 
271 void
272 fdset_del(struct fdset *pfdset, int fd)
273 {
274 	struct fdentry *pfdentry;
275 
276 	if (pfdset == NULL || fd == -1)
277 		return;
278 
279 	do {
280 		pthread_mutex_lock(&pfdset->fd_mutex);
281 		pfdentry = fdset_find_entry_locked(pfdset, fd);
282 		if (pfdentry != NULL && pfdentry->busy == 0) {
283 			fdset_del_locked(pfdset, pfdentry);
284 			pfdentry = NULL;
285 		}
286 		pthread_mutex_unlock(&pfdset->fd_mutex);
287 	} while (pfdentry != NULL);
288 }
289 
290 /**
291  *  Unregister the fd from the fdset.
292  *
293  *  If parameters are invalid, return directly -2.
294  *  And check whether fd is busy, if yes, return -1.
295  *  Otherwise, try to delete the fd from fdset and
296  *  return true.
297  */
298 int
299 fdset_try_del(struct fdset *pfdset, int fd)
300 {
301 	struct fdentry *pfdentry;
302 
303 	if (pfdset == NULL || fd == -1)
304 		return -2;
305 
306 	pthread_mutex_lock(&pfdset->fd_mutex);
307 	pfdentry = fdset_find_entry_locked(pfdset, fd);
308 	if (pfdentry != NULL && pfdentry->busy != 0) {
309 		pthread_mutex_unlock(&pfdset->fd_mutex);
310 		return -1;
311 	}
312 
313 	if (pfdentry != NULL)
314 		fdset_del_locked(pfdset, pfdentry);
315 
316 	pthread_mutex_unlock(&pfdset->fd_mutex);
317 	return 0;
318 }
319 
320 /**
321  * This functions runs in infinite blocking loop until there is no fd in
322  * pfdset. It calls corresponding r/w handler if there is event on the fd.
323  *
324  * Before the callback is called, we set the flag to busy status; If other
325  * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
326  * will wait until the flag is reset to zero(which indicates the callback is
327  * finished), then it could free the context after fdset_del.
328  */
329 static uint32_t
330 fdset_event_dispatch(void *arg)
331 {
332 	int i;
333 	fd_cb rcb, wcb;
334 	void *dat;
335 	int fd, numfds;
336 	int remove1, remove2;
337 	struct fdset *pfdset = arg;
338 
339 	if (pfdset == NULL)
340 		return 0;
341 
342 	while (1) {
343 		struct epoll_event events[MAX_FDS];
344 		struct fdentry *pfdentry;
345 
346 		numfds = epoll_wait(pfdset->epfd, events, RTE_DIM(events), 1000);
347 		if (numfds < 0)
348 			continue;
349 
350 		for (i = 0; i < numfds; i++) {
351 			pthread_mutex_lock(&pfdset->fd_mutex);
352 
353 			fd = events[i].data.fd;
354 			pfdentry = fdset_find_entry_locked(pfdset, fd);
355 			if (pfdentry == NULL) {
356 				pthread_mutex_unlock(&pfdset->fd_mutex);
357 				continue;
358 			}
359 
360 			remove1 = remove2 = 0;
361 
362 			rcb = pfdentry->rcb;
363 			wcb = pfdentry->wcb;
364 			dat = pfdentry->dat;
365 			pfdentry->busy = 1;
366 
367 			pthread_mutex_unlock(&pfdset->fd_mutex);
368 
369 			if (rcb && events[i].events & (EPOLLIN | EPOLLERR | EPOLLHUP))
370 				rcb(fd, dat, &remove1);
371 			if (wcb && events[i].events & (EPOLLOUT | EPOLLERR | EPOLLHUP))
372 				wcb(fd, dat, &remove2);
373 			pfdentry->busy = 0;
374 			/*
375 			 * fdset_del needs to check busy flag.
376 			 * We don't allow fdset_del to be called in callback
377 			 * directly.
378 			 */
379 			/*
380 			 * A concurrent fdset_del may have been waiting for the
381 			 * fdentry not to be busy, so we can't call
382 			 * fdset_del_locked().
383 			 */
384 			if (remove1 || remove2)
385 				fdset_del(pfdset, fd);
386 		}
387 
388 		if (pfdset->destroy)
389 			break;
390 	}
391 
392 	return 0;
393 }
394