xref: /dpdk/lib/vhost/fd_man.c (revision e9fd1ebf981f361844aea9ec94e17f4bda5e1479)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <stdio.h>
6 #include <unistd.h>
7 
8 #include <rte_common.h>
9 #include <rte_log.h>
10 
11 #include "fd_man.h"
12 
13 RTE_LOG_REGISTER_SUFFIX(vhost_fdset_logtype, fdset, INFO);
14 #define RTE_LOGTYPE_VHOST_FDMAN vhost_fdset_logtype
15 #define VHOST_FDMAN_LOG(level, ...) \
16 	RTE_LOG_LINE(level, VHOST_FDMAN, "" __VA_ARGS__)
17 
18 #define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
19 
20 static int
21 get_last_valid_idx(struct fdset *pfdset, int last_valid_idx)
22 {
23 	int i;
24 
25 	for (i = last_valid_idx; i >= 0 && pfdset->fd[i].fd == -1; i--)
26 		;
27 
28 	return i;
29 }
30 
31 static void
32 fdset_move(struct fdset *pfdset, int dst, int src)
33 {
34 	pfdset->fd[dst]    = pfdset->fd[src];
35 	pfdset->rwfds[dst] = pfdset->rwfds[src];
36 }
37 
38 static void
39 fdset_shrink_nolock(struct fdset *pfdset)
40 {
41 	int i;
42 	int last_valid_idx = get_last_valid_idx(pfdset, pfdset->num - 1);
43 
44 	for (i = 0; i < last_valid_idx; i++) {
45 		if (pfdset->fd[i].fd != -1)
46 			continue;
47 
48 		fdset_move(pfdset, i, last_valid_idx);
49 		last_valid_idx = get_last_valid_idx(pfdset, last_valid_idx - 1);
50 	}
51 	pfdset->num = last_valid_idx + 1;
52 }
53 
54 /*
55  * Find deleted fd entries and remove them
56  */
57 static void
58 fdset_shrink(struct fdset *pfdset)
59 {
60 	pthread_mutex_lock(&pfdset->fd_mutex);
61 	fdset_shrink_nolock(pfdset);
62 	pthread_mutex_unlock(&pfdset->fd_mutex);
63 }
64 
65 /**
66  * Returns the index in the fdset for a given fd.
67  * @return
68  *   index for the fd, or -1 if fd isn't in the fdset.
69  */
70 static int
71 fdset_find_fd(struct fdset *pfdset, int fd)
72 {
73 	int i;
74 
75 	for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++)
76 		;
77 
78 	return i == pfdset->num ? -1 : i;
79 }
80 
81 static void
82 fdset_add_fd(struct fdset *pfdset, int idx, int fd,
83 	fd_cb rcb, fd_cb wcb, void *dat)
84 {
85 	struct fdentry *pfdentry = &pfdset->fd[idx];
86 	struct pollfd *pfd = &pfdset->rwfds[idx];
87 
88 	pfdentry->fd  = fd;
89 	pfdentry->rcb = rcb;
90 	pfdentry->wcb = wcb;
91 	pfdentry->dat = dat;
92 
93 	pfd->fd = fd;
94 	pfd->events  = rcb ? POLLIN : 0;
95 	pfd->events |= wcb ? POLLOUT : 0;
96 	pfd->revents = 0;
97 }
98 
99 void
100 fdset_init(struct fdset *pfdset)
101 {
102 	int i;
103 
104 	if (pfdset == NULL)
105 		return;
106 
107 	for (i = 0; i < MAX_FDS; i++) {
108 		pfdset->fd[i].fd = -1;
109 		pfdset->fd[i].dat = NULL;
110 	}
111 	pfdset->num = 0;
112 }
113 
114 /**
115  * Register the fd in the fdset with read/write handler and context.
116  */
117 int
118 fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
119 {
120 	int i;
121 
122 	if (pfdset == NULL || fd == -1)
123 		return -1;
124 
125 	pthread_mutex_lock(&pfdset->fd_mutex);
126 	i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
127 	if (i == -1) {
128 		pthread_mutex_lock(&pfdset->fd_pooling_mutex);
129 		fdset_shrink_nolock(pfdset);
130 		pthread_mutex_unlock(&pfdset->fd_pooling_mutex);
131 		i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
132 		if (i == -1) {
133 			pthread_mutex_unlock(&pfdset->fd_mutex);
134 			return -2;
135 		}
136 	}
137 
138 	fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
139 	pthread_mutex_unlock(&pfdset->fd_mutex);
140 
141 	return 0;
142 }
143 
144 /**
145  *  Unregister the fd from the fdset.
146  *  Returns context of a given fd or NULL.
147  */
148 void *
149 fdset_del(struct fdset *pfdset, int fd)
150 {
151 	int i;
152 	void *dat = NULL;
153 
154 	if (pfdset == NULL || fd == -1)
155 		return NULL;
156 
157 	do {
158 		pthread_mutex_lock(&pfdset->fd_mutex);
159 
160 		i = fdset_find_fd(pfdset, fd);
161 		if (i != -1 && pfdset->fd[i].busy == 0) {
162 			/* busy indicates r/wcb is executing! */
163 			dat = pfdset->fd[i].dat;
164 			pfdset->fd[i].fd = -1;
165 			pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
166 			pfdset->fd[i].dat = NULL;
167 			i = -1;
168 		}
169 		pthread_mutex_unlock(&pfdset->fd_mutex);
170 	} while (i != -1);
171 
172 	return dat;
173 }
174 
175 /**
176  *  Unregister the fd from the fdset.
177  *
178  *  If parameters are invalid, return directly -2.
179  *  And check whether fd is busy, if yes, return -1.
180  *  Otherwise, try to delete the fd from fdset and
181  *  return true.
182  */
183 int
184 fdset_try_del(struct fdset *pfdset, int fd)
185 {
186 	int i;
187 
188 	if (pfdset == NULL || fd == -1)
189 		return -2;
190 
191 	pthread_mutex_lock(&pfdset->fd_mutex);
192 	i = fdset_find_fd(pfdset, fd);
193 	if (i != -1 && pfdset->fd[i].busy) {
194 		pthread_mutex_unlock(&pfdset->fd_mutex);
195 		return -1;
196 	}
197 
198 	if (i != -1) {
199 		pfdset->fd[i].fd = -1;
200 		pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
201 		pfdset->fd[i].dat = NULL;
202 	}
203 
204 	pthread_mutex_unlock(&pfdset->fd_mutex);
205 	return 0;
206 }
207 
208 /**
209  * This functions runs in infinite blocking loop until there is no fd in
210  * pfdset. It calls corresponding r/w handler if there is event on the fd.
211  *
212  * Before the callback is called, we set the flag to busy status; If other
213  * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
214  * will wait until the flag is reset to zero(which indicates the callback is
215  * finished), then it could free the context after fdset_del.
216  */
217 uint32_t
218 fdset_event_dispatch(void *arg)
219 {
220 	int i;
221 	struct pollfd *pfd;
222 	struct fdentry *pfdentry;
223 	fd_cb rcb, wcb;
224 	void *dat;
225 	int fd, numfds;
226 	int remove1, remove2;
227 	int need_shrink;
228 	struct fdset *pfdset = arg;
229 	int val;
230 
231 	if (pfdset == NULL)
232 		return 0;
233 
234 	while (1) {
235 
236 		/*
237 		 * When poll is blocked, other threads might unregister
238 		 * listenfds from and register new listenfds into fdset.
239 		 * When poll returns, the entries for listenfds in the fdset
240 		 * might have been updated. It is ok if there is unwanted call
241 		 * for new listenfds.
242 		 */
243 		pthread_mutex_lock(&pfdset->fd_mutex);
244 		numfds = pfdset->num;
245 		pthread_mutex_unlock(&pfdset->fd_mutex);
246 
247 		pthread_mutex_lock(&pfdset->fd_pooling_mutex);
248 		val = poll(pfdset->rwfds, numfds, 1000 /* millisecs */);
249 		pthread_mutex_unlock(&pfdset->fd_pooling_mutex);
250 		if (val < 0)
251 			continue;
252 
253 		need_shrink = 0;
254 		for (i = 0; i < numfds; i++) {
255 			pthread_mutex_lock(&pfdset->fd_mutex);
256 
257 			pfdentry = &pfdset->fd[i];
258 			fd = pfdentry->fd;
259 			pfd = &pfdset->rwfds[i];
260 
261 			if (fd < 0) {
262 				need_shrink = 1;
263 				pthread_mutex_unlock(&pfdset->fd_mutex);
264 				continue;
265 			}
266 
267 			if (!pfd->revents) {
268 				pthread_mutex_unlock(&pfdset->fd_mutex);
269 				continue;
270 			}
271 
272 			remove1 = remove2 = 0;
273 
274 			rcb = pfdentry->rcb;
275 			wcb = pfdentry->wcb;
276 			dat = pfdentry->dat;
277 			pfdentry->busy = 1;
278 
279 			pthread_mutex_unlock(&pfdset->fd_mutex);
280 
281 			if (rcb && pfd->revents & (POLLIN | FDPOLLERR))
282 				rcb(fd, dat, &remove1);
283 			if (wcb && pfd->revents & (POLLOUT | FDPOLLERR))
284 				wcb(fd, dat, &remove2);
285 			pfdentry->busy = 0;
286 			/*
287 			 * fdset_del needs to check busy flag.
288 			 * We don't allow fdset_del to be called in callback
289 			 * directly.
290 			 */
291 			/*
292 			 * When we are to clean up the fd from fdset,
293 			 * because the fd is closed in the cb,
294 			 * the old fd val could be reused by when creates new
295 			 * listen fd in another thread, we couldn't call
296 			 * fdset_del.
297 			 */
298 			if (remove1 || remove2) {
299 				pfdentry->fd = -1;
300 				need_shrink = 1;
301 			}
302 		}
303 
304 		if (need_shrink)
305 			fdset_shrink(pfdset);
306 	}
307 
308 	return 0;
309 }
310 
311 static void
312 fdset_pipe_read_cb(int readfd, void *dat,
313 		   int *remove __rte_unused)
314 {
315 	char charbuf[16];
316 	struct fdset *fdset = dat;
317 	int r = read(readfd, charbuf, sizeof(charbuf));
318 	/*
319 	 * Just an optimization, we don't care if read() failed
320 	 * so ignore explicitly its return value to make the
321 	 * compiler happy
322 	 */
323 	RTE_SET_USED(r);
324 
325 	pthread_mutex_lock(&fdset->sync_mutex);
326 	fdset->sync = true;
327 	pthread_cond_broadcast(&fdset->sync_cond);
328 	pthread_mutex_unlock(&fdset->sync_mutex);
329 }
330 
331 void
332 fdset_pipe_uninit(struct fdset *fdset)
333 {
334 	fdset_del(fdset, fdset->u.readfd);
335 	close(fdset->u.readfd);
336 	close(fdset->u.writefd);
337 }
338 
339 int
340 fdset_pipe_init(struct fdset *fdset)
341 {
342 	int ret;
343 
344 	if (pipe(fdset->u.pipefd) < 0) {
345 		VHOST_FDMAN_LOG(ERR,
346 			"failed to create pipe for vhost fdset");
347 		return -1;
348 	}
349 
350 	ret = fdset_add(fdset, fdset->u.readfd,
351 			fdset_pipe_read_cb, NULL, fdset);
352 
353 	if (ret < 0) {
354 		VHOST_FDMAN_LOG(ERR,
355 			"failed to add pipe readfd %d into vhost server fdset",
356 			fdset->u.readfd);
357 
358 		fdset_pipe_uninit(fdset);
359 		return -1;
360 	}
361 
362 	return 0;
363 }
364 
365 void
366 fdset_pipe_notify(struct fdset *fdset)
367 {
368 	int r = write(fdset->u.writefd, "1", 1);
369 	/*
370 	 * Just an optimization, we don't care if write() failed
371 	 * so ignore explicitly its return value to make the
372 	 * compiler happy
373 	 */
374 	RTE_SET_USED(r);
375 }
376 
377 void
378 fdset_pipe_notify_sync(struct fdset *fdset)
379 {
380 	pthread_mutex_lock(&fdset->sync_mutex);
381 
382 	fdset->sync = false;
383 	fdset_pipe_notify(fdset);
384 
385 	while (!fdset->sync)
386 		pthread_cond_wait(&fdset->sync_cond, &fdset->sync_mutex);
387 
388 	pthread_mutex_unlock(&fdset->sync_mutex);
389 }
390