1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #include <stdio.h> 6 #include <unistd.h> 7 8 #include <rte_common.h> 9 #include <rte_log.h> 10 11 #include "fd_man.h" 12 13 RTE_LOG_REGISTER_SUFFIX(vhost_fdset_logtype, fdset, INFO); 14 #define RTE_LOGTYPE_VHOST_FDMAN vhost_fdset_logtype 15 #define VHOST_FDMAN_LOG(level, ...) \ 16 RTE_LOG_LINE(level, VHOST_FDMAN, "" __VA_ARGS__) 17 18 #define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL) 19 20 static int 21 get_last_valid_idx(struct fdset *pfdset, int last_valid_idx) 22 { 23 int i; 24 25 for (i = last_valid_idx; i >= 0 && pfdset->fd[i].fd == -1; i--) 26 ; 27 28 return i; 29 } 30 31 static void 32 fdset_move(struct fdset *pfdset, int dst, int src) 33 { 34 pfdset->fd[dst] = pfdset->fd[src]; 35 pfdset->rwfds[dst] = pfdset->rwfds[src]; 36 } 37 38 static void 39 fdset_shrink_nolock(struct fdset *pfdset) 40 { 41 int i; 42 int last_valid_idx = get_last_valid_idx(pfdset, pfdset->num - 1); 43 44 for (i = 0; i < last_valid_idx; i++) { 45 if (pfdset->fd[i].fd != -1) 46 continue; 47 48 fdset_move(pfdset, i, last_valid_idx); 49 last_valid_idx = get_last_valid_idx(pfdset, last_valid_idx - 1); 50 } 51 pfdset->num = last_valid_idx + 1; 52 } 53 54 /* 55 * Find deleted fd entries and remove them 56 */ 57 static void 58 fdset_shrink(struct fdset *pfdset) 59 { 60 pthread_mutex_lock(&pfdset->fd_mutex); 61 fdset_shrink_nolock(pfdset); 62 pthread_mutex_unlock(&pfdset->fd_mutex); 63 } 64 65 /** 66 * Returns the index in the fdset for a given fd. 67 * @return 68 * index for the fd, or -1 if fd isn't in the fdset. 69 */ 70 static int 71 fdset_find_fd(struct fdset *pfdset, int fd) 72 { 73 int i; 74 75 for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++) 76 ; 77 78 return i == pfdset->num ? -1 : i; 79 } 80 81 static void 82 fdset_add_fd(struct fdset *pfdset, int idx, int fd, 83 fd_cb rcb, fd_cb wcb, void *dat) 84 { 85 struct fdentry *pfdentry = &pfdset->fd[idx]; 86 struct pollfd *pfd = &pfdset->rwfds[idx]; 87 88 pfdentry->fd = fd; 89 pfdentry->rcb = rcb; 90 pfdentry->wcb = wcb; 91 pfdentry->dat = dat; 92 93 pfd->fd = fd; 94 pfd->events = rcb ? POLLIN : 0; 95 pfd->events |= wcb ? POLLOUT : 0; 96 pfd->revents = 0; 97 } 98 99 void 100 fdset_init(struct fdset *pfdset) 101 { 102 int i; 103 104 if (pfdset == NULL) 105 return; 106 107 for (i = 0; i < MAX_FDS; i++) { 108 pfdset->fd[i].fd = -1; 109 pfdset->fd[i].dat = NULL; 110 } 111 pfdset->num = 0; 112 } 113 114 /** 115 * Register the fd in the fdset with read/write handler and context. 116 */ 117 int 118 fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat) 119 { 120 int i; 121 122 if (pfdset == NULL || fd == -1) 123 return -1; 124 125 pthread_mutex_lock(&pfdset->fd_mutex); 126 i = pfdset->num < MAX_FDS ? pfdset->num++ : -1; 127 if (i == -1) { 128 pthread_mutex_lock(&pfdset->fd_pooling_mutex); 129 fdset_shrink_nolock(pfdset); 130 pthread_mutex_unlock(&pfdset->fd_pooling_mutex); 131 i = pfdset->num < MAX_FDS ? pfdset->num++ : -1; 132 if (i == -1) { 133 pthread_mutex_unlock(&pfdset->fd_mutex); 134 return -2; 135 } 136 } 137 138 fdset_add_fd(pfdset, i, fd, rcb, wcb, dat); 139 pthread_mutex_unlock(&pfdset->fd_mutex); 140 141 return 0; 142 } 143 144 /** 145 * Unregister the fd from the fdset. 146 * Returns context of a given fd or NULL. 147 */ 148 void * 149 fdset_del(struct fdset *pfdset, int fd) 150 { 151 int i; 152 void *dat = NULL; 153 154 if (pfdset == NULL || fd == -1) 155 return NULL; 156 157 do { 158 pthread_mutex_lock(&pfdset->fd_mutex); 159 160 i = fdset_find_fd(pfdset, fd); 161 if (i != -1 && pfdset->fd[i].busy == 0) { 162 /* busy indicates r/wcb is executing! */ 163 dat = pfdset->fd[i].dat; 164 pfdset->fd[i].fd = -1; 165 pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL; 166 pfdset->fd[i].dat = NULL; 167 i = -1; 168 } 169 pthread_mutex_unlock(&pfdset->fd_mutex); 170 } while (i != -1); 171 172 return dat; 173 } 174 175 /** 176 * Unregister the fd from the fdset. 177 * 178 * If parameters are invalid, return directly -2. 179 * And check whether fd is busy, if yes, return -1. 180 * Otherwise, try to delete the fd from fdset and 181 * return true. 182 */ 183 int 184 fdset_try_del(struct fdset *pfdset, int fd) 185 { 186 int i; 187 188 if (pfdset == NULL || fd == -1) 189 return -2; 190 191 pthread_mutex_lock(&pfdset->fd_mutex); 192 i = fdset_find_fd(pfdset, fd); 193 if (i != -1 && pfdset->fd[i].busy) { 194 pthread_mutex_unlock(&pfdset->fd_mutex); 195 return -1; 196 } 197 198 if (i != -1) { 199 pfdset->fd[i].fd = -1; 200 pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL; 201 pfdset->fd[i].dat = NULL; 202 } 203 204 pthread_mutex_unlock(&pfdset->fd_mutex); 205 return 0; 206 } 207 208 /** 209 * This functions runs in infinite blocking loop until there is no fd in 210 * pfdset. It calls corresponding r/w handler if there is event on the fd. 211 * 212 * Before the callback is called, we set the flag to busy status; If other 213 * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it 214 * will wait until the flag is reset to zero(which indicates the callback is 215 * finished), then it could free the context after fdset_del. 216 */ 217 uint32_t 218 fdset_event_dispatch(void *arg) 219 { 220 int i; 221 struct pollfd *pfd; 222 struct fdentry *pfdentry; 223 fd_cb rcb, wcb; 224 void *dat; 225 int fd, numfds; 226 int remove1, remove2; 227 int need_shrink; 228 struct fdset *pfdset = arg; 229 int val; 230 231 if (pfdset == NULL) 232 return 0; 233 234 while (1) { 235 236 /* 237 * When poll is blocked, other threads might unregister 238 * listenfds from and register new listenfds into fdset. 239 * When poll returns, the entries for listenfds in the fdset 240 * might have been updated. It is ok if there is unwanted call 241 * for new listenfds. 242 */ 243 pthread_mutex_lock(&pfdset->fd_mutex); 244 numfds = pfdset->num; 245 pthread_mutex_unlock(&pfdset->fd_mutex); 246 247 pthread_mutex_lock(&pfdset->fd_pooling_mutex); 248 val = poll(pfdset->rwfds, numfds, 1000 /* millisecs */); 249 pthread_mutex_unlock(&pfdset->fd_pooling_mutex); 250 if (val < 0) 251 continue; 252 253 need_shrink = 0; 254 for (i = 0; i < numfds; i++) { 255 pthread_mutex_lock(&pfdset->fd_mutex); 256 257 pfdentry = &pfdset->fd[i]; 258 fd = pfdentry->fd; 259 pfd = &pfdset->rwfds[i]; 260 261 if (fd < 0) { 262 need_shrink = 1; 263 pthread_mutex_unlock(&pfdset->fd_mutex); 264 continue; 265 } 266 267 if (!pfd->revents) { 268 pthread_mutex_unlock(&pfdset->fd_mutex); 269 continue; 270 } 271 272 remove1 = remove2 = 0; 273 274 rcb = pfdentry->rcb; 275 wcb = pfdentry->wcb; 276 dat = pfdentry->dat; 277 pfdentry->busy = 1; 278 279 pthread_mutex_unlock(&pfdset->fd_mutex); 280 281 if (rcb && pfd->revents & (POLLIN | FDPOLLERR)) 282 rcb(fd, dat, &remove1); 283 if (wcb && pfd->revents & (POLLOUT | FDPOLLERR)) 284 wcb(fd, dat, &remove2); 285 pfdentry->busy = 0; 286 /* 287 * fdset_del needs to check busy flag. 288 * We don't allow fdset_del to be called in callback 289 * directly. 290 */ 291 /* 292 * When we are to clean up the fd from fdset, 293 * because the fd is closed in the cb, 294 * the old fd val could be reused by when creates new 295 * listen fd in another thread, we couldn't call 296 * fdset_del. 297 */ 298 if (remove1 || remove2) { 299 pfdentry->fd = -1; 300 need_shrink = 1; 301 } 302 } 303 304 if (need_shrink) 305 fdset_shrink(pfdset); 306 } 307 308 return 0; 309 } 310 311 static void 312 fdset_pipe_read_cb(int readfd, void *dat, 313 int *remove __rte_unused) 314 { 315 char charbuf[16]; 316 struct fdset *fdset = dat; 317 int r = read(readfd, charbuf, sizeof(charbuf)); 318 /* 319 * Just an optimization, we don't care if read() failed 320 * so ignore explicitly its return value to make the 321 * compiler happy 322 */ 323 RTE_SET_USED(r); 324 325 pthread_mutex_lock(&fdset->sync_mutex); 326 fdset->sync = true; 327 pthread_cond_broadcast(&fdset->sync_cond); 328 pthread_mutex_unlock(&fdset->sync_mutex); 329 } 330 331 void 332 fdset_pipe_uninit(struct fdset *fdset) 333 { 334 fdset_del(fdset, fdset->u.readfd); 335 close(fdset->u.readfd); 336 close(fdset->u.writefd); 337 } 338 339 int 340 fdset_pipe_init(struct fdset *fdset) 341 { 342 int ret; 343 344 if (pipe(fdset->u.pipefd) < 0) { 345 VHOST_FDMAN_LOG(ERR, 346 "failed to create pipe for vhost fdset"); 347 return -1; 348 } 349 350 ret = fdset_add(fdset, fdset->u.readfd, 351 fdset_pipe_read_cb, NULL, fdset); 352 353 if (ret < 0) { 354 VHOST_FDMAN_LOG(ERR, 355 "failed to add pipe readfd %d into vhost server fdset", 356 fdset->u.readfd); 357 358 fdset_pipe_uninit(fdset); 359 return -1; 360 } 361 362 return 0; 363 } 364 365 void 366 fdset_pipe_notify(struct fdset *fdset) 367 { 368 int r = write(fdset->u.writefd, "1", 1); 369 /* 370 * Just an optimization, we don't care if write() failed 371 * so ignore explicitly its return value to make the 372 * compiler happy 373 */ 374 RTE_SET_USED(r); 375 } 376 377 void 378 fdset_pipe_notify_sync(struct fdset *fdset) 379 { 380 pthread_mutex_lock(&fdset->sync_mutex); 381 382 fdset->sync = false; 383 fdset_pipe_notify(fdset); 384 385 while (!fdset->sync) 386 pthread_cond_wait(&fdset->sync_cond, &fdset->sync_mutex); 387 388 pthread_mutex_unlock(&fdset->sync_mutex); 389 } 390