1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2014 Intel Corporation 3 */ 4 5 #include <errno.h> 6 #include <stdio.h> 7 #include <string.h> 8 #include <sys/epoll.h> 9 #include <unistd.h> 10 11 #include <rte_common.h> 12 #include <rte_log.h> 13 #include <rte_malloc.h> 14 #include <rte_string_fns.h> 15 #include <rte_thread.h> 16 17 #include "fd_man.h" 18 19 RTE_LOG_REGISTER_SUFFIX(vhost_fdset_logtype, fdset, INFO); 20 #define RTE_LOGTYPE_VHOST_FDMAN vhost_fdset_logtype 21 #define VHOST_FDMAN_LOG(level, ...) \ 22 RTE_LOG_LINE(level, VHOST_FDMAN, "" __VA_ARGS__) 23 24 struct fdentry { 25 int fd; /* -1 indicates this entry is empty */ 26 fd_cb rcb; /* callback when this fd is readable. */ 27 fd_cb wcb; /* callback when this fd is writeable.*/ 28 void *dat; /* fd context */ 29 int busy; /* whether this entry is being used in cb. */ 30 LIST_ENTRY(fdentry) next; 31 }; 32 33 struct fdset { 34 char name[RTE_THREAD_NAME_SIZE]; 35 int epfd; 36 struct fdentry fd[MAX_FDS]; 37 LIST_HEAD(, fdentry) fdlist; 38 int next_free_idx; 39 rte_thread_t tid; 40 pthread_mutex_t fd_mutex; 41 bool destroy; 42 }; 43 44 #define MAX_FDSETS 8 45 46 static struct fdset *fdsets[MAX_FDSETS]; 47 static pthread_mutex_t fdsets_mutex = PTHREAD_MUTEX_INITIALIZER; 48 49 static uint32_t fdset_event_dispatch(void *arg); 50 51 static struct fdset * 52 fdset_lookup(const char *name) 53 { 54 int i; 55 56 for (i = 0; i < MAX_FDSETS; i++) { 57 struct fdset *fdset = fdsets[i]; 58 if (fdset == NULL) 59 continue; 60 61 if (!strncmp(fdset->name, name, RTE_THREAD_NAME_SIZE)) 62 return fdset; 63 } 64 65 return NULL; 66 } 67 68 static int 69 fdset_insert(struct fdset *fdset) 70 { 71 int i; 72 73 for (i = 0; i < MAX_FDSETS; i++) { 74 if (fdsets[i] == NULL) { 75 fdsets[i] = fdset; 76 return 0; 77 } 78 } 79 80 return -1; 81 } 82 83 struct fdset * 84 fdset_init(const char *name) 85 { 86 struct fdset *fdset; 87 uint32_t val; 88 int i; 89 90 pthread_mutex_lock(&fdsets_mutex); 91 fdset = fdset_lookup(name); 92 if (fdset) { 93 pthread_mutex_unlock(&fdsets_mutex); 94 return fdset; 95 } 96 97 fdset = rte_zmalloc(NULL, sizeof(*fdset), 0); 98 if (!fdset) { 99 VHOST_FDMAN_LOG(ERR, "failed to alloc fdset %s", name); 100 goto err_unlock; 101 } 102 103 rte_strscpy(fdset->name, name, RTE_THREAD_NAME_SIZE); 104 105 pthread_mutex_init(&fdset->fd_mutex, NULL); 106 107 for (i = 0; i < (int)RTE_DIM(fdset->fd); i++) { 108 fdset->fd[i].fd = -1; 109 fdset->fd[i].dat = NULL; 110 } 111 LIST_INIT(&fdset->fdlist); 112 113 /* 114 * Any non-zero value would work (see man epoll_create), 115 * but pass MAX_FDS for consistency. 116 */ 117 fdset->epfd = epoll_create(MAX_FDS); 118 if (fdset->epfd < 0) { 119 VHOST_FDMAN_LOG(ERR, "failed to create epoll for %s fdset", name); 120 goto err_free; 121 } 122 123 if (rte_thread_create_internal_control(&fdset->tid, fdset->name, 124 fdset_event_dispatch, fdset)) { 125 VHOST_FDMAN_LOG(ERR, "Failed to create %s event dispatch thread", 126 fdset->name); 127 goto err_epoll; 128 } 129 130 if (fdset_insert(fdset)) { 131 VHOST_FDMAN_LOG(ERR, "Failed to insert fdset %s", name); 132 goto err_thread; 133 } 134 135 pthread_mutex_unlock(&fdsets_mutex); 136 137 return fdset; 138 139 err_thread: 140 fdset->destroy = true; 141 rte_thread_join(fdset->tid, &val); 142 err_epoll: 143 close(fdset->epfd); 144 err_free: 145 rte_free(fdset); 146 err_unlock: 147 pthread_mutex_unlock(&fdsets_mutex); 148 149 return NULL; 150 } 151 152 static int 153 fdset_insert_entry(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat) 154 { 155 struct fdentry *pfdentry; 156 157 if (pfdset->next_free_idx >= (int)RTE_DIM(pfdset->fd)) 158 return -1; 159 160 pfdentry = &pfdset->fd[pfdset->next_free_idx]; 161 pfdentry->fd = fd; 162 pfdentry->rcb = rcb; 163 pfdentry->wcb = wcb; 164 pfdentry->dat = dat; 165 166 LIST_INSERT_HEAD(&pfdset->fdlist, pfdentry, next); 167 168 /* Find next free slot */ 169 pfdset->next_free_idx++; 170 for (; pfdset->next_free_idx < (int)RTE_DIM(pfdset->fd); pfdset->next_free_idx++) { 171 if (pfdset->fd[pfdset->next_free_idx].fd != -1) 172 continue; 173 break; 174 } 175 176 return 0; 177 } 178 179 static void 180 fdset_remove_entry(struct fdset *pfdset, struct fdentry *pfdentry) 181 { 182 int entry_idx; 183 184 pfdentry->fd = -1; 185 pfdentry->rcb = pfdentry->wcb = NULL; 186 pfdentry->dat = NULL; 187 188 entry_idx = pfdentry - pfdset->fd; 189 if (entry_idx < pfdset->next_free_idx) 190 pfdset->next_free_idx = entry_idx; 191 192 LIST_REMOVE(pfdentry, next); 193 } 194 195 static struct fdentry * 196 fdset_find_entry_locked(struct fdset *pfdset, int fd) 197 { 198 struct fdentry *pfdentry; 199 200 LIST_FOREACH(pfdentry, &pfdset->fdlist, next) { 201 if (pfdentry->fd != fd) 202 continue; 203 return pfdentry; 204 } 205 206 return NULL; 207 } 208 209 /** 210 * Register the fd in the fdset with read/write handler and context. 211 */ 212 int 213 fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat) 214 { 215 struct epoll_event ev; 216 struct fdentry *pfdentry; 217 int ret = 0; 218 219 if (pfdset == NULL || fd == -1) { 220 ret = -1; 221 goto out; 222 } 223 224 pthread_mutex_lock(&pfdset->fd_mutex); 225 ret = fdset_insert_entry(pfdset, fd, rcb, wcb, dat); 226 if (ret < 0) { 227 VHOST_FDMAN_LOG(ERR, "failed to insert fdset entry"); 228 pthread_mutex_unlock(&pfdset->fd_mutex); 229 goto out; 230 } 231 pthread_mutex_unlock(&pfdset->fd_mutex); 232 233 ev.events = EPOLLERR; 234 ev.events |= rcb ? EPOLLIN : 0; 235 ev.events |= wcb ? EPOLLOUT : 0; 236 ev.data.fd = fd; 237 238 ret = epoll_ctl(pfdset->epfd, EPOLL_CTL_ADD, fd, &ev); 239 if (ret < 0) { 240 VHOST_FDMAN_LOG(ERR, "could not add %d fd to %d epfd: %s", 241 fd, pfdset->epfd, strerror(errno)); 242 goto out_remove; 243 } 244 245 return 0; 246 out_remove: 247 pthread_mutex_lock(&pfdset->fd_mutex); 248 pfdentry = fdset_find_entry_locked(pfdset, fd); 249 if (pfdentry) 250 fdset_remove_entry(pfdset, pfdentry); 251 pthread_mutex_unlock(&pfdset->fd_mutex); 252 out: 253 return ret; 254 } 255 256 static void 257 fdset_del_locked(struct fdset *pfdset, struct fdentry *pfdentry) 258 { 259 if (epoll_ctl(pfdset->epfd, EPOLL_CTL_DEL, pfdentry->fd, NULL) == -1) 260 VHOST_FDMAN_LOG(ERR, "could not remove %d fd from %d epfd: %s", 261 pfdentry->fd, pfdset->epfd, strerror(errno)); 262 263 fdset_remove_entry(pfdset, pfdentry); 264 } 265 266 void 267 fdset_del(struct fdset *pfdset, int fd) 268 { 269 struct fdentry *pfdentry; 270 271 if (pfdset == NULL || fd == -1) 272 return; 273 274 do { 275 pthread_mutex_lock(&pfdset->fd_mutex); 276 pfdentry = fdset_find_entry_locked(pfdset, fd); 277 if (pfdentry != NULL && pfdentry->busy == 0) { 278 fdset_del_locked(pfdset, pfdentry); 279 pfdentry = NULL; 280 } 281 pthread_mutex_unlock(&pfdset->fd_mutex); 282 } while (pfdentry != NULL); 283 } 284 285 /** 286 * Unregister the fd from the fdset. 287 * 288 * If parameters are invalid, return directly -2. 289 * And check whether fd is busy, if yes, return -1. 290 * Otherwise, try to delete the fd from fdset and 291 * return true. 292 */ 293 int 294 fdset_try_del(struct fdset *pfdset, int fd) 295 { 296 struct fdentry *pfdentry; 297 298 if (pfdset == NULL || fd == -1) 299 return -2; 300 301 pthread_mutex_lock(&pfdset->fd_mutex); 302 pfdentry = fdset_find_entry_locked(pfdset, fd); 303 if (pfdentry != NULL && pfdentry->busy != 0) { 304 pthread_mutex_unlock(&pfdset->fd_mutex); 305 return -1; 306 } 307 308 if (pfdentry != NULL) 309 fdset_del_locked(pfdset, pfdentry); 310 311 pthread_mutex_unlock(&pfdset->fd_mutex); 312 return 0; 313 } 314 315 /** 316 * This functions runs in infinite blocking loop until there is no fd in 317 * pfdset. It calls corresponding r/w handler if there is event on the fd. 318 * 319 * Before the callback is called, we set the flag to busy status; If other 320 * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it 321 * will wait until the flag is reset to zero(which indicates the callback is 322 * finished), then it could free the context after fdset_del. 323 */ 324 static uint32_t 325 fdset_event_dispatch(void *arg) 326 { 327 int i; 328 fd_cb rcb, wcb; 329 void *dat; 330 int fd, numfds; 331 int remove1, remove2; 332 struct fdset *pfdset = arg; 333 334 if (pfdset == NULL) 335 return 0; 336 337 while (1) { 338 struct epoll_event events[MAX_FDS]; 339 struct fdentry *pfdentry; 340 341 numfds = epoll_wait(pfdset->epfd, events, RTE_DIM(events), 1000); 342 if (numfds < 0) 343 continue; 344 345 for (i = 0; i < numfds; i++) { 346 pthread_mutex_lock(&pfdset->fd_mutex); 347 348 fd = events[i].data.fd; 349 pfdentry = fdset_find_entry_locked(pfdset, fd); 350 if (pfdentry == NULL) { 351 pthread_mutex_unlock(&pfdset->fd_mutex); 352 continue; 353 } 354 355 remove1 = remove2 = 0; 356 357 rcb = pfdentry->rcb; 358 wcb = pfdentry->wcb; 359 dat = pfdentry->dat; 360 pfdentry->busy = 1; 361 362 pthread_mutex_unlock(&pfdset->fd_mutex); 363 364 if (rcb && events[i].events & (EPOLLIN | EPOLLERR | EPOLLHUP)) 365 rcb(fd, dat, &remove1); 366 if (wcb && events[i].events & (EPOLLOUT | EPOLLERR | EPOLLHUP)) 367 wcb(fd, dat, &remove2); 368 pfdentry->busy = 0; 369 /* 370 * fdset_del needs to check busy flag. 371 * We don't allow fdset_del to be called in callback 372 * directly. 373 */ 374 /* 375 * A concurrent fdset_del may have been waiting for the 376 * fdentry not to be busy, so we can't call 377 * fdset_del_locked(). 378 */ 379 if (remove1 || remove2) 380 fdset_del(pfdset, fd); 381 } 382 383 if (pfdset->destroy) 384 break; 385 } 386 387 return 0; 388 } 389