xref: /dpdk/lib/vhost/fd_man.c (revision 99a2dd955fba6e4cc23b77d590a033650ced9c45)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(c) 2010-2014 Intel Corporation
3  */
4 
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <sys/socket.h>
9 #include <sys/time.h>
10 #include <sys/types.h>
11 #include <unistd.h>
12 #include <string.h>
13 
14 #include <rte_common.h>
15 #include <rte_log.h>
16 
17 #include "fd_man.h"
18 
19 
20 #define RTE_LOGTYPE_VHOST_FDMAN RTE_LOGTYPE_USER1
21 
22 #define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
23 
24 static int
25 get_last_valid_idx(struct fdset *pfdset, int last_valid_idx)
26 {
27 	int i;
28 
29 	for (i = last_valid_idx; i >= 0 && pfdset->fd[i].fd == -1; i--)
30 		;
31 
32 	return i;
33 }
34 
35 static void
36 fdset_move(struct fdset *pfdset, int dst, int src)
37 {
38 	pfdset->fd[dst]    = pfdset->fd[src];
39 	pfdset->rwfds[dst] = pfdset->rwfds[src];
40 }
41 
42 static void
43 fdset_shrink_nolock(struct fdset *pfdset)
44 {
45 	int i;
46 	int last_valid_idx = get_last_valid_idx(pfdset, pfdset->num - 1);
47 
48 	for (i = 0; i < last_valid_idx; i++) {
49 		if (pfdset->fd[i].fd != -1)
50 			continue;
51 
52 		fdset_move(pfdset, i, last_valid_idx);
53 		last_valid_idx = get_last_valid_idx(pfdset, last_valid_idx - 1);
54 	}
55 	pfdset->num = last_valid_idx + 1;
56 }
57 
58 /*
59  * Find deleted fd entries and remove them
60  */
61 static void
62 fdset_shrink(struct fdset *pfdset)
63 {
64 	pthread_mutex_lock(&pfdset->fd_mutex);
65 	fdset_shrink_nolock(pfdset);
66 	pthread_mutex_unlock(&pfdset->fd_mutex);
67 }
68 
69 /**
70  * Returns the index in the fdset for a given fd.
71  * @return
72  *   index for the fd, or -1 if fd isn't in the fdset.
73  */
74 static int
75 fdset_find_fd(struct fdset *pfdset, int fd)
76 {
77 	int i;
78 
79 	for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++)
80 		;
81 
82 	return i == pfdset->num ? -1 : i;
83 }
84 
85 static void
86 fdset_add_fd(struct fdset *pfdset, int idx, int fd,
87 	fd_cb rcb, fd_cb wcb, void *dat)
88 {
89 	struct fdentry *pfdentry = &pfdset->fd[idx];
90 	struct pollfd *pfd = &pfdset->rwfds[idx];
91 
92 	pfdentry->fd  = fd;
93 	pfdentry->rcb = rcb;
94 	pfdentry->wcb = wcb;
95 	pfdentry->dat = dat;
96 
97 	pfd->fd = fd;
98 	pfd->events  = rcb ? POLLIN : 0;
99 	pfd->events |= wcb ? POLLOUT : 0;
100 	pfd->revents = 0;
101 }
102 
103 void
104 fdset_init(struct fdset *pfdset)
105 {
106 	int i;
107 
108 	if (pfdset == NULL)
109 		return;
110 
111 	for (i = 0; i < MAX_FDS; i++) {
112 		pfdset->fd[i].fd = -1;
113 		pfdset->fd[i].dat = NULL;
114 	}
115 	pfdset->num = 0;
116 }
117 
118 /**
119  * Register the fd in the fdset with read/write handler and context.
120  */
121 int
122 fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
123 {
124 	int i;
125 
126 	if (pfdset == NULL || fd == -1)
127 		return -1;
128 
129 	pthread_mutex_lock(&pfdset->fd_mutex);
130 	i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
131 	if (i == -1) {
132 		pthread_mutex_lock(&pfdset->fd_pooling_mutex);
133 		fdset_shrink_nolock(pfdset);
134 		pthread_mutex_unlock(&pfdset->fd_pooling_mutex);
135 		i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
136 		if (i == -1) {
137 			pthread_mutex_unlock(&pfdset->fd_mutex);
138 			return -2;
139 		}
140 	}
141 
142 	fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
143 	pthread_mutex_unlock(&pfdset->fd_mutex);
144 
145 	return 0;
146 }
147 
148 /**
149  *  Unregister the fd from the fdset.
150  *  Returns context of a given fd or NULL.
151  */
152 void *
153 fdset_del(struct fdset *pfdset, int fd)
154 {
155 	int i;
156 	void *dat = NULL;
157 
158 	if (pfdset == NULL || fd == -1)
159 		return NULL;
160 
161 	do {
162 		pthread_mutex_lock(&pfdset->fd_mutex);
163 
164 		i = fdset_find_fd(pfdset, fd);
165 		if (i != -1 && pfdset->fd[i].busy == 0) {
166 			/* busy indicates r/wcb is executing! */
167 			dat = pfdset->fd[i].dat;
168 			pfdset->fd[i].fd = -1;
169 			pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
170 			pfdset->fd[i].dat = NULL;
171 			i = -1;
172 		}
173 		pthread_mutex_unlock(&pfdset->fd_mutex);
174 	} while (i != -1);
175 
176 	return dat;
177 }
178 
179 /**
180  *  Unregister the fd from the fdset.
181  *
182  *  If parameters are invalid, return directly -2.
183  *  And check whether fd is busy, if yes, return -1.
184  *  Otherwise, try to delete the fd from fdset and
185  *  return true.
186  */
187 int
188 fdset_try_del(struct fdset *pfdset, int fd)
189 {
190 	int i;
191 
192 	if (pfdset == NULL || fd == -1)
193 		return -2;
194 
195 	pthread_mutex_lock(&pfdset->fd_mutex);
196 	i = fdset_find_fd(pfdset, fd);
197 	if (i != -1 && pfdset->fd[i].busy) {
198 		pthread_mutex_unlock(&pfdset->fd_mutex);
199 		return -1;
200 	}
201 
202 	if (i != -1) {
203 		pfdset->fd[i].fd = -1;
204 		pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
205 		pfdset->fd[i].dat = NULL;
206 	}
207 
208 	pthread_mutex_unlock(&pfdset->fd_mutex);
209 	return 0;
210 }
211 
212 /**
213  * This functions runs in infinite blocking loop until there is no fd in
214  * pfdset. It calls corresponding r/w handler if there is event on the fd.
215  *
216  * Before the callback is called, we set the flag to busy status; If other
217  * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
218  * will wait until the flag is reset to zero(which indicates the callback is
219  * finished), then it could free the context after fdset_del.
220  */
221 void *
222 fdset_event_dispatch(void *arg)
223 {
224 	int i;
225 	struct pollfd *pfd;
226 	struct fdentry *pfdentry;
227 	fd_cb rcb, wcb;
228 	void *dat;
229 	int fd, numfds;
230 	int remove1, remove2;
231 	int need_shrink;
232 	struct fdset *pfdset = arg;
233 	int val;
234 
235 	if (pfdset == NULL)
236 		return NULL;
237 
238 	while (1) {
239 
240 		/*
241 		 * When poll is blocked, other threads might unregister
242 		 * listenfds from and register new listenfds into fdset.
243 		 * When poll returns, the entries for listenfds in the fdset
244 		 * might have been updated. It is ok if there is unwanted call
245 		 * for new listenfds.
246 		 */
247 		pthread_mutex_lock(&pfdset->fd_mutex);
248 		numfds = pfdset->num;
249 		pthread_mutex_unlock(&pfdset->fd_mutex);
250 
251 		pthread_mutex_lock(&pfdset->fd_pooling_mutex);
252 		val = poll(pfdset->rwfds, numfds, 1000 /* millisecs */);
253 		pthread_mutex_unlock(&pfdset->fd_pooling_mutex);
254 		if (val < 0)
255 			continue;
256 
257 		need_shrink = 0;
258 		for (i = 0; i < numfds; i++) {
259 			pthread_mutex_lock(&pfdset->fd_mutex);
260 
261 			pfdentry = &pfdset->fd[i];
262 			fd = pfdentry->fd;
263 			pfd = &pfdset->rwfds[i];
264 
265 			if (fd < 0) {
266 				need_shrink = 1;
267 				pthread_mutex_unlock(&pfdset->fd_mutex);
268 				continue;
269 			}
270 
271 			if (!pfd->revents) {
272 				pthread_mutex_unlock(&pfdset->fd_mutex);
273 				continue;
274 			}
275 
276 			remove1 = remove2 = 0;
277 
278 			rcb = pfdentry->rcb;
279 			wcb = pfdentry->wcb;
280 			dat = pfdentry->dat;
281 			pfdentry->busy = 1;
282 
283 			pthread_mutex_unlock(&pfdset->fd_mutex);
284 
285 			if (rcb && pfd->revents & (POLLIN | FDPOLLERR))
286 				rcb(fd, dat, &remove1);
287 			if (wcb && pfd->revents & (POLLOUT | FDPOLLERR))
288 				wcb(fd, dat, &remove2);
289 			pfdentry->busy = 0;
290 			/*
291 			 * fdset_del needs to check busy flag.
292 			 * We don't allow fdset_del to be called in callback
293 			 * directly.
294 			 */
295 			/*
296 			 * When we are to clean up the fd from fdset,
297 			 * because the fd is closed in the cb,
298 			 * the old fd val could be reused by when creates new
299 			 * listen fd in another thread, we couldn't call
300 			 * fdset_del.
301 			 */
302 			if (remove1 || remove2) {
303 				pfdentry->fd = -1;
304 				need_shrink = 1;
305 			}
306 		}
307 
308 		if (need_shrink)
309 			fdset_shrink(pfdset);
310 	}
311 
312 	return NULL;
313 }
314 
315 static void
316 fdset_pipe_read_cb(int readfd, void *dat __rte_unused,
317 		   int *remove __rte_unused)
318 {
319 	char charbuf[16];
320 	int r = read(readfd, charbuf, sizeof(charbuf));
321 	/*
322 	 * Just an optimization, we don't care if read() failed
323 	 * so ignore explicitly its return value to make the
324 	 * compiler happy
325 	 */
326 	RTE_SET_USED(r);
327 }
328 
329 void
330 fdset_pipe_uninit(struct fdset *fdset)
331 {
332 	fdset_del(fdset, fdset->u.readfd);
333 	close(fdset->u.readfd);
334 	close(fdset->u.writefd);
335 }
336 
337 int
338 fdset_pipe_init(struct fdset *fdset)
339 {
340 	int ret;
341 
342 	if (pipe(fdset->u.pipefd) < 0) {
343 		RTE_LOG(ERR, VHOST_FDMAN,
344 			"failed to create pipe for vhost fdset\n");
345 		return -1;
346 	}
347 
348 	ret = fdset_add(fdset, fdset->u.readfd,
349 			fdset_pipe_read_cb, NULL, NULL);
350 
351 	if (ret < 0) {
352 		RTE_LOG(ERR, VHOST_FDMAN,
353 			"failed to add pipe readfd %d into vhost server fdset\n",
354 			fdset->u.readfd);
355 
356 		fdset_pipe_uninit(fdset);
357 		return -1;
358 	}
359 
360 	return 0;
361 }
362 
363 void
364 fdset_pipe_notify(struct fdset *fdset)
365 {
366 	int r = write(fdset->u.writefd, "1", 1);
367 	/*
368 	 * Just an optimization, we don't care if write() failed
369 	 * so ignore explicitly its return value to make the
370 	 * compiler happy
371 	 */
372 	RTE_SET_USED(r);
373 
374 }
375