1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <stdio.h> 7 #include <stdlib.h> 8 #include <sys/socket.h> 9 #include <sys/time.h> 10 #include <sys/types.h> 11 #include <unistd.h> 12 #include <string.h> 13 14 #include <rte_common.h> 15 #include <rte_log.h> 16 17 #include "fd_man.h" 18 19 20 #define RTE_LOGTYPE_VHOST_FDMAN RTE_LOGTYPE_USER1 21 22 #define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL) 23 24 static int 25 get_last_valid_idx(struct fdset *pfdset, int last_valid_idx) 26 { 27 int i; 28 29 for (i = last_valid_idx; i >= 0 && pfdset->fd[i].fd == -1; i--) 30 ; 31 32 return i; 33 } 34 35 static void 36 fdset_move(struct fdset *pfdset, int dst, int src) 37 { 38 pfdset->fd[dst] = pfdset->fd[src]; 39 pfdset->rwfds[dst] = pfdset->rwfds[src]; 40 } 41 42 static void 43 fdset_shrink_nolock(struct fdset *pfdset) 44 { 45 int i; 46 int last_valid_idx = get_last_valid_idx(pfdset, pfdset->num - 1); 47 48 for (i = 0; i < last_valid_idx; i++) { 49 if (pfdset->fd[i].fd != -1) 50 continue; 51 52 fdset_move(pfdset, i, last_valid_idx); 53 last_valid_idx = get_last_valid_idx(pfdset, last_valid_idx - 1); 54 } 55 pfdset->num = last_valid_idx + 1; 56 } 57 58 /* 59 * Find deleted fd entries and remove them 60 */ 61 static void 62 fdset_shrink(struct fdset *pfdset) 63 { 64 pthread_mutex_lock(&pfdset->fd_mutex); 65 fdset_shrink_nolock(pfdset); 66 pthread_mutex_unlock(&pfdset->fd_mutex); 67 } 68 69 /** 70 * Returns the index in the fdset for a given fd. 71 * @return 72 * index for the fd, or -1 if fd isn't in the fdset. 73 */ 74 static int 75 fdset_find_fd(struct fdset *pfdset, int fd) 76 { 77 int i; 78 79 for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++) 80 ; 81 82 return i == pfdset->num ? -1 : i; 83 } 84 85 static void 86 fdset_add_fd(struct fdset *pfdset, int idx, int fd, 87 fd_cb rcb, fd_cb wcb, void *dat) 88 { 89 struct fdentry *pfdentry = &pfdset->fd[idx]; 90 struct pollfd *pfd = &pfdset->rwfds[idx]; 91 92 pfdentry->fd = fd; 93 pfdentry->rcb = rcb; 94 pfdentry->wcb = wcb; 95 pfdentry->dat = dat; 96 97 pfd->fd = fd; 98 pfd->events = rcb ? POLLIN : 0; 99 pfd->events |= wcb ? POLLOUT : 0; 100 pfd->revents = 0; 101 } 102 103 void 104 fdset_init(struct fdset *pfdset) 105 { 106 int i; 107 108 if (pfdset == NULL) 109 return; 110 111 for (i = 0; i < MAX_FDS; i++) { 112 pfdset->fd[i].fd = -1; 113 pfdset->fd[i].dat = NULL; 114 } 115 pfdset->num = 0; 116 } 117 118 /** 119 * Register the fd in the fdset with read/write handler and context. 120 */ 121 int 122 fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat) 123 { 124 int i; 125 126 if (pfdset == NULL || fd == -1) 127 return -1; 128 129 pthread_mutex_lock(&pfdset->fd_mutex); 130 i = pfdset->num < MAX_FDS ? pfdset->num++ : -1; 131 if (i == -1) { 132 pthread_mutex_lock(&pfdset->fd_pooling_mutex); 133 fdset_shrink_nolock(pfdset); 134 pthread_mutex_unlock(&pfdset->fd_pooling_mutex); 135 i = pfdset->num < MAX_FDS ? pfdset->num++ : -1; 136 if (i == -1) { 137 pthread_mutex_unlock(&pfdset->fd_mutex); 138 return -2; 139 } 140 } 141 142 fdset_add_fd(pfdset, i, fd, rcb, wcb, dat); 143 pthread_mutex_unlock(&pfdset->fd_mutex); 144 145 return 0; 146 } 147 148 /** 149 * Unregister the fd from the fdset. 150 * Returns context of a given fd or NULL. 151 */ 152 void * 153 fdset_del(struct fdset *pfdset, int fd) 154 { 155 int i; 156 void *dat = NULL; 157 158 if (pfdset == NULL || fd == -1) 159 return NULL; 160 161 do { 162 pthread_mutex_lock(&pfdset->fd_mutex); 163 164 i = fdset_find_fd(pfdset, fd); 165 if (i != -1 && pfdset->fd[i].busy == 0) { 166 /* busy indicates r/wcb is executing! */ 167 dat = pfdset->fd[i].dat; 168 pfdset->fd[i].fd = -1; 169 pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL; 170 pfdset->fd[i].dat = NULL; 171 i = -1; 172 } 173 pthread_mutex_unlock(&pfdset->fd_mutex); 174 } while (i != -1); 175 176 return dat; 177 } 178 179 /** 180 * Unregister the fd from the fdset. 181 * 182 * If parameters are invalid, return directly -2. 183 * And check whether fd is busy, if yes, return -1. 184 * Otherwise, try to delete the fd from fdset and 185 * return true. 186 */ 187 int 188 fdset_try_del(struct fdset *pfdset, int fd) 189 { 190 int i; 191 192 if (pfdset == NULL || fd == -1) 193 return -2; 194 195 pthread_mutex_lock(&pfdset->fd_mutex); 196 i = fdset_find_fd(pfdset, fd); 197 if (i != -1 && pfdset->fd[i].busy) { 198 pthread_mutex_unlock(&pfdset->fd_mutex); 199 return -1; 200 } 201 202 if (i != -1) { 203 pfdset->fd[i].fd = -1; 204 pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL; 205 pfdset->fd[i].dat = NULL; 206 } 207 208 pthread_mutex_unlock(&pfdset->fd_mutex); 209 return 0; 210 } 211 212 /** 213 * This functions runs in infinite blocking loop until there is no fd in 214 * pfdset. It calls corresponding r/w handler if there is event on the fd. 215 * 216 * Before the callback is called, we set the flag to busy status; If other 217 * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it 218 * will wait until the flag is reset to zero(which indicates the callback is 219 * finished), then it could free the context after fdset_del. 220 */ 221 void * 222 fdset_event_dispatch(void *arg) 223 { 224 int i; 225 struct pollfd *pfd; 226 struct fdentry *pfdentry; 227 fd_cb rcb, wcb; 228 void *dat; 229 int fd, numfds; 230 int remove1, remove2; 231 int need_shrink; 232 struct fdset *pfdset = arg; 233 int val; 234 235 if (pfdset == NULL) 236 return NULL; 237 238 while (1) { 239 240 /* 241 * When poll is blocked, other threads might unregister 242 * listenfds from and register new listenfds into fdset. 243 * When poll returns, the entries for listenfds in the fdset 244 * might have been updated. It is ok if there is unwanted call 245 * for new listenfds. 246 */ 247 pthread_mutex_lock(&pfdset->fd_mutex); 248 numfds = pfdset->num; 249 pthread_mutex_unlock(&pfdset->fd_mutex); 250 251 pthread_mutex_lock(&pfdset->fd_pooling_mutex); 252 val = poll(pfdset->rwfds, numfds, 1000 /* millisecs */); 253 pthread_mutex_unlock(&pfdset->fd_pooling_mutex); 254 if (val < 0) 255 continue; 256 257 need_shrink = 0; 258 for (i = 0; i < numfds; i++) { 259 pthread_mutex_lock(&pfdset->fd_mutex); 260 261 pfdentry = &pfdset->fd[i]; 262 fd = pfdentry->fd; 263 pfd = &pfdset->rwfds[i]; 264 265 if (fd < 0) { 266 need_shrink = 1; 267 pthread_mutex_unlock(&pfdset->fd_mutex); 268 continue; 269 } 270 271 if (!pfd->revents) { 272 pthread_mutex_unlock(&pfdset->fd_mutex); 273 continue; 274 } 275 276 remove1 = remove2 = 0; 277 278 rcb = pfdentry->rcb; 279 wcb = pfdentry->wcb; 280 dat = pfdentry->dat; 281 pfdentry->busy = 1; 282 283 pthread_mutex_unlock(&pfdset->fd_mutex); 284 285 if (rcb && pfd->revents & (POLLIN | FDPOLLERR)) 286 rcb(fd, dat, &remove1); 287 if (wcb && pfd->revents & (POLLOUT | FDPOLLERR)) 288 wcb(fd, dat, &remove2); 289 pfdentry->busy = 0; 290 /* 291 * fdset_del needs to check busy flag. 292 * We don't allow fdset_del to be called in callback 293 * directly. 294 */ 295 /* 296 * When we are to clean up the fd from fdset, 297 * because the fd is closed in the cb, 298 * the old fd val could be reused by when creates new 299 * listen fd in another thread, we couldn't call 300 * fdset_del. 301 */ 302 if (remove1 || remove2) { 303 pfdentry->fd = -1; 304 need_shrink = 1; 305 } 306 } 307 308 if (need_shrink) 309 fdset_shrink(pfdset); 310 } 311 312 return NULL; 313 } 314 315 static void 316 fdset_pipe_read_cb(int readfd, void *dat __rte_unused, 317 int *remove __rte_unused) 318 { 319 char charbuf[16]; 320 int r = read(readfd, charbuf, sizeof(charbuf)); 321 /* 322 * Just an optimization, we don't care if read() failed 323 * so ignore explicitly its return value to make the 324 * compiler happy 325 */ 326 RTE_SET_USED(r); 327 } 328 329 void 330 fdset_pipe_uninit(struct fdset *fdset) 331 { 332 fdset_del(fdset, fdset->u.readfd); 333 close(fdset->u.readfd); 334 close(fdset->u.writefd); 335 } 336 337 int 338 fdset_pipe_init(struct fdset *fdset) 339 { 340 int ret; 341 342 if (pipe(fdset->u.pipefd) < 0) { 343 RTE_LOG(ERR, VHOST_FDMAN, 344 "failed to create pipe for vhost fdset\n"); 345 return -1; 346 } 347 348 ret = fdset_add(fdset, fdset->u.readfd, 349 fdset_pipe_read_cb, NULL, NULL); 350 351 if (ret < 0) { 352 RTE_LOG(ERR, VHOST_FDMAN, 353 "failed to add pipe readfd %d into vhost server fdset\n", 354 fdset->u.readfd); 355 356 fdset_pipe_uninit(fdset); 357 return -1; 358 } 359 360 return 0; 361 } 362 363 void 364 fdset_pipe_notify(struct fdset *fdset) 365 { 366 int r = write(fdset->u.writefd, "1", 1); 367 /* 368 * Just an optimization, we don't care if write() failed 369 * so ignore explicitly its return value to make the 370 * compiler happy 371 */ 372 RTE_SET_USED(r); 373 374 } 375