1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #include <errno.h> 6 #include <stdio.h> 7 #include <string.h> 8 #include <sys/epoll.h> 9 #include <unistd.h> 10 11 #include <rte_common.h> 12 #include <rte_log.h> 13 #include <rte_malloc.h> 14 #include <rte_string_fns.h> 15 #include <rte_thread.h> 16 17 #include "fd_man.h" 18 19 RTE_LOG_REGISTER_SUFFIX(vhost_fdset_logtype, fdset, INFO); 20 #define RTE_LOGTYPE_VHOST_FDMAN vhost_fdset_logtype 21 #define VHOST_FDMAN_LOG(level, ...) \ 22 RTE_LOG_LINE(level, VHOST_FDMAN, "" __VA_ARGS__) 23 24 struct fdentry { 25 int fd; /* -1 indicates this entry is empty */ 26 fd_cb rcb; /* callback when this fd is readable. */ 27 fd_cb wcb; /* callback when this fd is writeable.*/ 28 void *dat; /* fd context */ 29 int busy; /* whether this entry is being used in cb. */ 30 LIST_ENTRY(fdentry) next; 31 }; 32 33 struct fdset { 34 char name[RTE_THREAD_NAME_SIZE]; 35 int epfd; 36 struct fdentry fd[MAX_FDS]; 37 LIST_HEAD(, fdentry) fdlist; 38 int next_free_idx; 39 rte_thread_t tid; 40 pthread_mutex_t fd_mutex; 41 bool destroy; 42 }; 43 44 #define MAX_FDSETS 8 45 46 static struct fdset *fdsets[MAX_FDSETS]; 47 static pthread_mutex_t fdsets_mutex = PTHREAD_MUTEX_INITIALIZER; 48 49 static uint32_t fdset_event_dispatch(void *arg); 50 51 static struct fdset * 52 fdset_lookup(const char *name) 53 { 54 int i; 55 56 for (i = 0; i < MAX_FDSETS; i++) { 57 struct fdset *fdset = fdsets[i]; 58 if (fdset == NULL) 59 continue; 60 61 if (!strncmp(fdset->name, name, RTE_THREAD_NAME_SIZE)) 62 return fdset; 63 } 64 65 return NULL; 66 } 67 68 static int 69 fdset_insert(struct fdset *fdset) 70 { 71 int i; 72 73 for (i = 0; i < MAX_FDSETS; i++) { 74 if (fdsets[i] == NULL) { 75 fdsets[i] = fdset; 76 return 0; 77 } 78 } 79 80 return -1; 81 } 82 83 struct fdset * 84 fdset_init(const char *name) 85 { 86 struct fdset *fdset; 87 uint32_t val; 88 int i; 89 90 pthread_mutex_lock(&fdsets_mutex); 91 fdset = fdset_lookup(name); 92 if (fdset) { 93 pthread_mutex_unlock(&fdsets_mutex); 94 return fdset; 95 } 96 97 fdset = rte_zmalloc(NULL, sizeof(*fdset), 0); 98 if (!fdset) { 99 VHOST_FDMAN_LOG(ERR, "failed to alloc fdset %s", name); 100 goto err_unlock; 101 } 102 103 rte_strscpy(fdset->name, name, RTE_THREAD_NAME_SIZE); 104 105 pthread_mutex_init(&fdset->fd_mutex, NULL); 106 107 for (i = 0; i < (int)RTE_DIM(fdset->fd); i++) { 108 fdset->fd[i].fd = -1; 109 fdset->fd[i].dat = NULL; 110 } 111 LIST_INIT(&fdset->fdlist); 112 113 /* 114 * Any non-zero value would work (see man epoll_create), 115 * but pass MAX_FDS for consistency. 116 */ 117 fdset->epfd = epoll_create(MAX_FDS); 118 if (fdset->epfd < 0) { 119 VHOST_FDMAN_LOG(ERR, "failed to create epoll for %s fdset", name); 120 goto err_free; 121 } 122 123 if (rte_thread_create_internal_control(&fdset->tid, fdset->name, 124 fdset_event_dispatch, fdset)) { 125 VHOST_FDMAN_LOG(ERR, "Failed to create %s event dispatch thread", 126 fdset->name); 127 goto err_epoll; 128 } 129 130 if (fdset_insert(fdset)) { 131 VHOST_FDMAN_LOG(ERR, "Failed to insert fdset %s", name); 132 goto err_thread; 133 } 134 135 pthread_mutex_unlock(&fdsets_mutex); 136 137 return fdset; 138 139 err_thread: 140 fdset->destroy = true; 141 rte_thread_join(fdset->tid, &val); 142 err_epoll: 143 close(fdset->epfd); 144 err_free: 145 rte_free(fdset); 146 err_unlock: 147 pthread_mutex_unlock(&fdsets_mutex); 148 149 return NULL; 150 } 151 152 static int 153 fdset_insert_entry(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat) 154 { 155 struct fdentry *pfdentry; 156 157 if (pfdset->next_free_idx >= (int)RTE_DIM(pfdset->fd)) 158 return -1; 159 160 pfdentry = &pfdset->fd[pfdset->next_free_idx]; 161 pfdentry->fd = fd; 162 pfdentry->rcb = rcb; 163 pfdentry->wcb = wcb; 164 pfdentry->dat = dat; 165 166 LIST_INSERT_HEAD(&pfdset->fdlist, pfdentry, next); 167 168 /* Find next free slot */ 169 pfdset->next_free_idx++; 170 for (; pfdset->next_free_idx < (int)RTE_DIM(pfdset->fd); pfdset->next_free_idx++) { 171 if (pfdset->fd[pfdset->next_free_idx].fd != -1) 172 continue; 173 break; 174 } 175 176 return 0; 177 } 178 179 static void 180 fdset_remove_entry(struct fdset *pfdset, struct fdentry *pfdentry) 181 { 182 int entry_idx; 183 184 pfdentry->fd = -1; 185 pfdentry->rcb = pfdentry->wcb = NULL; 186 pfdentry->dat = NULL; 187 188 entry_idx = pfdentry - pfdset->fd; 189 if (entry_idx < pfdset->next_free_idx) 190 pfdset->next_free_idx = entry_idx; 191 192 LIST_REMOVE(pfdentry, next); 193 } 194 195 static struct fdentry * 196 fdset_find_entry_locked(struct fdset *pfdset, int fd) 197 { 198 struct fdentry *pfdentry; 199 200 LIST_FOREACH(pfdentry, &pfdset->fdlist, next) { 201 if (pfdentry->fd != fd) 202 continue; 203 return pfdentry; 204 } 205 206 return NULL; 207 } 208 209 /** 210 * Register the fd in the fdset with read/write handler and context. 211 */ 212 int 213 fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat) 214 { 215 struct epoll_event ev; 216 struct fdentry *pfdentry; 217 int ret = 0; 218 219 if (pfdset == NULL || fd == -1) { 220 ret = -1; 221 goto out; 222 } 223 224 pthread_mutex_lock(&pfdset->fd_mutex); 225 ret = fdset_insert_entry(pfdset, fd, rcb, wcb, dat); 226 if (ret < 0) { 227 VHOST_FDMAN_LOG(ERR, "failed to insert fdset entry"); 228 pthread_mutex_unlock(&pfdset->fd_mutex); 229 goto out; 230 } 231 pthread_mutex_unlock(&pfdset->fd_mutex); 232 233 ev.events = EPOLLERR; 234 ev.events |= rcb ? EPOLLIN : 0; 235 ev.events |= wcb ? EPOLLOUT : 0; 236 ev.data.fd = fd; 237 238 ret = epoll_ctl(pfdset->epfd, EPOLL_CTL_ADD, fd, &ev); 239 if (ret < 0) { 240 VHOST_FDMAN_LOG(ERR, "could not add %d fd to %d epfd: %s", 241 fd, pfdset->epfd, strerror(errno)); 242 goto out_remove; 243 } 244 245 return 0; 246 out_remove: 247 pthread_mutex_lock(&pfdset->fd_mutex); 248 pfdentry = fdset_find_entry_locked(pfdset, fd); 249 if (pfdentry) 250 fdset_remove_entry(pfdset, pfdentry); 251 pthread_mutex_unlock(&pfdset->fd_mutex); 252 out: 253 return ret; 254 } 255 256 static void 257 fdset_del_locked(struct fdset *pfdset, struct fdentry *pfdentry) 258 { 259 if (epoll_ctl(pfdset->epfd, EPOLL_CTL_DEL, pfdentry->fd, NULL) == -1) { 260 if (errno == EBADF) /* File might have already been closed. */ 261 VHOST_FDMAN_LOG(DEBUG, "could not remove %d fd from %d epfd: %s", 262 pfdentry->fd, pfdset->epfd, strerror(errno)); 263 else 264 VHOST_FDMAN_LOG(ERR, "could not remove %d fd from %d epfd: %s", 265 pfdentry->fd, pfdset->epfd, strerror(errno)); 266 } 267 268 fdset_remove_entry(pfdset, pfdentry); 269 } 270 271 void 272 fdset_del(struct fdset *pfdset, int fd) 273 { 274 struct fdentry *pfdentry; 275 276 if (pfdset == NULL || fd == -1) 277 return; 278 279 do { 280 pthread_mutex_lock(&pfdset->fd_mutex); 281 pfdentry = fdset_find_entry_locked(pfdset, fd); 282 if (pfdentry != NULL && pfdentry->busy == 0) { 283 fdset_del_locked(pfdset, pfdentry); 284 pfdentry = NULL; 285 } 286 pthread_mutex_unlock(&pfdset->fd_mutex); 287 } while (pfdentry != NULL); 288 } 289 290 /** 291 * Unregister the fd from the fdset. 292 * 293 * If parameters are invalid, return directly -2. 294 * And check whether fd is busy, if yes, return -1. 295 * Otherwise, try to delete the fd from fdset and 296 * return true. 297 */ 298 int 299 fdset_try_del(struct fdset *pfdset, int fd) 300 { 301 struct fdentry *pfdentry; 302 303 if (pfdset == NULL || fd == -1) 304 return -2; 305 306 pthread_mutex_lock(&pfdset->fd_mutex); 307 pfdentry = fdset_find_entry_locked(pfdset, fd); 308 if (pfdentry != NULL && pfdentry->busy != 0) { 309 pthread_mutex_unlock(&pfdset->fd_mutex); 310 return -1; 311 } 312 313 if (pfdentry != NULL) 314 fdset_del_locked(pfdset, pfdentry); 315 316 pthread_mutex_unlock(&pfdset->fd_mutex); 317 return 0; 318 } 319 320 /** 321 * This functions runs in infinite blocking loop until there is no fd in 322 * pfdset. It calls corresponding r/w handler if there is event on the fd. 323 * 324 * Before the callback is called, we set the flag to busy status; If other 325 * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it 326 * will wait until the flag is reset to zero(which indicates the callback is 327 * finished), then it could free the context after fdset_del. 328 */ 329 static uint32_t 330 fdset_event_dispatch(void *arg) 331 { 332 int i; 333 fd_cb rcb, wcb; 334 void *dat; 335 int fd, numfds; 336 int remove1, remove2; 337 struct fdset *pfdset = arg; 338 339 if (pfdset == NULL) 340 return 0; 341 342 while (1) { 343 struct epoll_event events[MAX_FDS]; 344 struct fdentry *pfdentry; 345 346 numfds = epoll_wait(pfdset->epfd, events, RTE_DIM(events), 1000); 347 if (numfds < 0) 348 continue; 349 350 for (i = 0; i < numfds; i++) { 351 pthread_mutex_lock(&pfdset->fd_mutex); 352 353 fd = events[i].data.fd; 354 pfdentry = fdset_find_entry_locked(pfdset, fd); 355 if (pfdentry == NULL) { 356 pthread_mutex_unlock(&pfdset->fd_mutex); 357 continue; 358 } 359 360 remove1 = remove2 = 0; 361 362 rcb = pfdentry->rcb; 363 wcb = pfdentry->wcb; 364 dat = pfdentry->dat; 365 pfdentry->busy = 1; 366 367 pthread_mutex_unlock(&pfdset->fd_mutex); 368 369 if (rcb && events[i].events & (EPOLLIN | EPOLLERR | EPOLLHUP)) 370 rcb(fd, dat, &remove1); 371 if (wcb && events[i].events & (EPOLLOUT | EPOLLERR | EPOLLHUP)) 372 wcb(fd, dat, &remove2); 373 pfdentry->busy = 0; 374 /* 375 * fdset_del needs to check busy flag. 376 * We don't allow fdset_del to be called in callback 377 * directly. 378 */ 379 /* 380 * A concurrent fdset_del may have been waiting for the 381 * fdentry not to be busy, so we can't call 382 * fdset_del_locked(). 383 */ 384 if (remove1 || remove2) 385 fdset_del(pfdset, fd); 386 } 387 388 if (pfdset->destroy) 389 break; 390 } 391 392 return 0; 393 } 394