xref: /dpdk/lib/vhost/fd_man.c (revision 5d52418fa4b9a7f28eaedc1d88ec5cf330381c0e)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <stdio.h>
6 #include <unistd.h>
7 
8 #include <rte_common.h>
9 #include <rte_log.h>
10 
11 #include "fd_man.h"
12 
13 
14 #define RTE_LOGTYPE_VHOST_FDMAN RTE_LOGTYPE_USER1
15 
16 #define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
17 
18 static int
19 get_last_valid_idx(struct fdset *pfdset, int last_valid_idx)
20 {
21 	int i;
22 
23 	for (i = last_valid_idx; i >= 0 && pfdset->fd[i].fd == -1; i--)
24 		;
25 
26 	return i;
27 }
28 
29 static void
30 fdset_move(struct fdset *pfdset, int dst, int src)
31 {
32 	pfdset->fd[dst]    = pfdset->fd[src];
33 	pfdset->rwfds[dst] = pfdset->rwfds[src];
34 }
35 
36 static void
37 fdset_shrink_nolock(struct fdset *pfdset)
38 {
39 	int i;
40 	int last_valid_idx = get_last_valid_idx(pfdset, pfdset->num - 1);
41 
42 	for (i = 0; i < last_valid_idx; i++) {
43 		if (pfdset->fd[i].fd != -1)
44 			continue;
45 
46 		fdset_move(pfdset, i, last_valid_idx);
47 		last_valid_idx = get_last_valid_idx(pfdset, last_valid_idx - 1);
48 	}
49 	pfdset->num = last_valid_idx + 1;
50 }
51 
52 /*
53  * Find deleted fd entries and remove them
54  */
55 static void
56 fdset_shrink(struct fdset *pfdset)
57 {
58 	pthread_mutex_lock(&pfdset->fd_mutex);
59 	fdset_shrink_nolock(pfdset);
60 	pthread_mutex_unlock(&pfdset->fd_mutex);
61 }
62 
63 /**
64  * Returns the index in the fdset for a given fd.
65  * @return
66  *   index for the fd, or -1 if fd isn't in the fdset.
67  */
68 static int
69 fdset_find_fd(struct fdset *pfdset, int fd)
70 {
71 	int i;
72 
73 	for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++)
74 		;
75 
76 	return i == pfdset->num ? -1 : i;
77 }
78 
79 static void
80 fdset_add_fd(struct fdset *pfdset, int idx, int fd,
81 	fd_cb rcb, fd_cb wcb, void *dat)
82 {
83 	struct fdentry *pfdentry = &pfdset->fd[idx];
84 	struct pollfd *pfd = &pfdset->rwfds[idx];
85 
86 	pfdentry->fd  = fd;
87 	pfdentry->rcb = rcb;
88 	pfdentry->wcb = wcb;
89 	pfdentry->dat = dat;
90 
91 	pfd->fd = fd;
92 	pfd->events  = rcb ? POLLIN : 0;
93 	pfd->events |= wcb ? POLLOUT : 0;
94 	pfd->revents = 0;
95 }
96 
97 void
98 fdset_init(struct fdset *pfdset)
99 {
100 	int i;
101 
102 	if (pfdset == NULL)
103 		return;
104 
105 	for (i = 0; i < MAX_FDS; i++) {
106 		pfdset->fd[i].fd = -1;
107 		pfdset->fd[i].dat = NULL;
108 	}
109 	pfdset->num = 0;
110 }
111 
112 /**
113  * Register the fd in the fdset with read/write handler and context.
114  */
115 int
116 fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
117 {
118 	int i;
119 
120 	if (pfdset == NULL || fd == -1)
121 		return -1;
122 
123 	pthread_mutex_lock(&pfdset->fd_mutex);
124 	i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
125 	if (i == -1) {
126 		pthread_mutex_lock(&pfdset->fd_pooling_mutex);
127 		fdset_shrink_nolock(pfdset);
128 		pthread_mutex_unlock(&pfdset->fd_pooling_mutex);
129 		i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
130 		if (i == -1) {
131 			pthread_mutex_unlock(&pfdset->fd_mutex);
132 			return -2;
133 		}
134 	}
135 
136 	fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
137 	pthread_mutex_unlock(&pfdset->fd_mutex);
138 
139 	return 0;
140 }
141 
142 /**
143  *  Unregister the fd from the fdset.
144  *  Returns context of a given fd or NULL.
145  */
146 void *
147 fdset_del(struct fdset *pfdset, int fd)
148 {
149 	int i;
150 	void *dat = NULL;
151 
152 	if (pfdset == NULL || fd == -1)
153 		return NULL;
154 
155 	do {
156 		pthread_mutex_lock(&pfdset->fd_mutex);
157 
158 		i = fdset_find_fd(pfdset, fd);
159 		if (i != -1 && pfdset->fd[i].busy == 0) {
160 			/* busy indicates r/wcb is executing! */
161 			dat = pfdset->fd[i].dat;
162 			pfdset->fd[i].fd = -1;
163 			pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
164 			pfdset->fd[i].dat = NULL;
165 			i = -1;
166 		}
167 		pthread_mutex_unlock(&pfdset->fd_mutex);
168 	} while (i != -1);
169 
170 	return dat;
171 }
172 
173 /**
174  *  Unregister the fd from the fdset.
175  *
176  *  If parameters are invalid, return directly -2.
177  *  And check whether fd is busy, if yes, return -1.
178  *  Otherwise, try to delete the fd from fdset and
179  *  return true.
180  */
181 int
182 fdset_try_del(struct fdset *pfdset, int fd)
183 {
184 	int i;
185 
186 	if (pfdset == NULL || fd == -1)
187 		return -2;
188 
189 	pthread_mutex_lock(&pfdset->fd_mutex);
190 	i = fdset_find_fd(pfdset, fd);
191 	if (i != -1 && pfdset->fd[i].busy) {
192 		pthread_mutex_unlock(&pfdset->fd_mutex);
193 		return -1;
194 	}
195 
196 	if (i != -1) {
197 		pfdset->fd[i].fd = -1;
198 		pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
199 		pfdset->fd[i].dat = NULL;
200 	}
201 
202 	pthread_mutex_unlock(&pfdset->fd_mutex);
203 	return 0;
204 }
205 
206 /**
207  * This functions runs in infinite blocking loop until there is no fd in
208  * pfdset. It calls corresponding r/w handler if there is event on the fd.
209  *
210  * Before the callback is called, we set the flag to busy status; If other
211  * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
212  * will wait until the flag is reset to zero(which indicates the callback is
213  * finished), then it could free the context after fdset_del.
214  */
215 uint32_t
216 fdset_event_dispatch(void *arg)
217 {
218 	int i;
219 	struct pollfd *pfd;
220 	struct fdentry *pfdentry;
221 	fd_cb rcb, wcb;
222 	void *dat;
223 	int fd, numfds;
224 	int remove1, remove2;
225 	int need_shrink;
226 	struct fdset *pfdset = arg;
227 	int val;
228 
229 	if (pfdset == NULL)
230 		return 0;
231 
232 	while (1) {
233 
234 		/*
235 		 * When poll is blocked, other threads might unregister
236 		 * listenfds from and register new listenfds into fdset.
237 		 * When poll returns, the entries for listenfds in the fdset
238 		 * might have been updated. It is ok if there is unwanted call
239 		 * for new listenfds.
240 		 */
241 		pthread_mutex_lock(&pfdset->fd_mutex);
242 		numfds = pfdset->num;
243 		pthread_mutex_unlock(&pfdset->fd_mutex);
244 
245 		pthread_mutex_lock(&pfdset->fd_pooling_mutex);
246 		val = poll(pfdset->rwfds, numfds, 1000 /* millisecs */);
247 		pthread_mutex_unlock(&pfdset->fd_pooling_mutex);
248 		if (val < 0)
249 			continue;
250 
251 		need_shrink = 0;
252 		for (i = 0; i < numfds; i++) {
253 			pthread_mutex_lock(&pfdset->fd_mutex);
254 
255 			pfdentry = &pfdset->fd[i];
256 			fd = pfdentry->fd;
257 			pfd = &pfdset->rwfds[i];
258 
259 			if (fd < 0) {
260 				need_shrink = 1;
261 				pthread_mutex_unlock(&pfdset->fd_mutex);
262 				continue;
263 			}
264 
265 			if (!pfd->revents) {
266 				pthread_mutex_unlock(&pfdset->fd_mutex);
267 				continue;
268 			}
269 
270 			remove1 = remove2 = 0;
271 
272 			rcb = pfdentry->rcb;
273 			wcb = pfdentry->wcb;
274 			dat = pfdentry->dat;
275 			pfdentry->busy = 1;
276 
277 			pthread_mutex_unlock(&pfdset->fd_mutex);
278 
279 			if (rcb && pfd->revents & (POLLIN | FDPOLLERR))
280 				rcb(fd, dat, &remove1);
281 			if (wcb && pfd->revents & (POLLOUT | FDPOLLERR))
282 				wcb(fd, dat, &remove2);
283 			pfdentry->busy = 0;
284 			/*
285 			 * fdset_del needs to check busy flag.
286 			 * We don't allow fdset_del to be called in callback
287 			 * directly.
288 			 */
289 			/*
290 			 * When we are to clean up the fd from fdset,
291 			 * because the fd is closed in the cb,
292 			 * the old fd val could be reused by when creates new
293 			 * listen fd in another thread, we couldn't call
294 			 * fdset_del.
295 			 */
296 			if (remove1 || remove2) {
297 				pfdentry->fd = -1;
298 				need_shrink = 1;
299 			}
300 		}
301 
302 		if (need_shrink)
303 			fdset_shrink(pfdset);
304 	}
305 
306 	return 0;
307 }
308 
309 static void
310 fdset_pipe_read_cb(int readfd, void *dat __rte_unused,
311 		   int *remove __rte_unused)
312 {
313 	char charbuf[16];
314 	int r = read(readfd, charbuf, sizeof(charbuf));
315 	/*
316 	 * Just an optimization, we don't care if read() failed
317 	 * so ignore explicitly its return value to make the
318 	 * compiler happy
319 	 */
320 	RTE_SET_USED(r);
321 }
322 
323 void
324 fdset_pipe_uninit(struct fdset *fdset)
325 {
326 	fdset_del(fdset, fdset->u.readfd);
327 	close(fdset->u.readfd);
328 	close(fdset->u.writefd);
329 }
330 
331 int
332 fdset_pipe_init(struct fdset *fdset)
333 {
334 	int ret;
335 
336 	if (pipe(fdset->u.pipefd) < 0) {
337 		RTE_LOG(ERR, VHOST_FDMAN,
338 			"failed to create pipe for vhost fdset\n");
339 		return -1;
340 	}
341 
342 	ret = fdset_add(fdset, fdset->u.readfd,
343 			fdset_pipe_read_cb, NULL, NULL);
344 
345 	if (ret < 0) {
346 		RTE_LOG(ERR, VHOST_FDMAN,
347 			"failed to add pipe readfd %d into vhost server fdset\n",
348 			fdset->u.readfd);
349 
350 		fdset_pipe_uninit(fdset);
351 		return -1;
352 	}
353 
354 	return 0;
355 }
356 
357 void
358 fdset_pipe_notify(struct fdset *fdset)
359 {
360 	int r = write(fdset->u.writefd, "1", 1);
361 	/*
362 	 * Just an optimization, we don't care if write() failed
363 	 * so ignore explicitly its return value to make the
364 	 * compiler happy
365 	 */
366 	RTE_SET_USED(r);
367 
368 }
369