xref: /spdk/lib/vfu_tgt/tgt_endpoint.c (revision 8af292d898481662ba17573b21b63f35fed2ced8)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2022 Intel Corporation.
3  *   All rights reserved.
4  */
5 
6 #include "spdk/stdinc.h"
7 #include "spdk/env.h"
8 #include "spdk/thread.h"
9 #include "spdk/log.h"
10 #include "spdk/util.h"
11 #include "spdk/memory.h"
12 #include "spdk/cpuset.h"
13 #include "spdk/likely.h"
14 #include "spdk/vfu_target.h"
15 
16 #include "tgt_internal.h"
17 
18 struct tgt_pci_device_ops {
19 	struct spdk_vfu_endpoint_ops ops;
20 	TAILQ_ENTRY(tgt_pci_device_ops) link;
21 };
22 
23 static struct spdk_cpuset g_tgt_core_mask;
24 static pthread_mutex_t g_endpoint_lock = PTHREAD_MUTEX_INITIALIZER;
25 static TAILQ_HEAD(, spdk_vfu_endpoint) g_endpoint = TAILQ_HEAD_INITIALIZER(g_endpoint);
26 static TAILQ_HEAD(, tgt_pci_device_ops) g_pci_device_ops = TAILQ_HEAD_INITIALIZER(g_pci_device_ops);
27 static char g_endpoint_path_dirname[PATH_MAX] = "";
28 static uint32_t g_fini_endpoint_cnt = 0;
29 static spdk_vfu_fini_cb g_fini_cb = NULL;
30 
31 static struct spdk_vfu_endpoint_ops *
32 tgt_get_pci_device_ops(const char *device_type_name)
33 {
34 	struct tgt_pci_device_ops *pci_ops, *tmp;
35 	bool exist = false;
36 
37 	pthread_mutex_lock(&g_endpoint_lock);
38 	TAILQ_FOREACH_SAFE(pci_ops, &g_pci_device_ops, link, tmp) {
39 		if (!strncmp(device_type_name, pci_ops->ops.name, SPDK_VFU_MAX_NAME_LEN)) {
40 			exist = true;
41 			break;
42 		}
43 	}
44 	pthread_mutex_unlock(&g_endpoint_lock);
45 
46 	if (exist) {
47 		return &pci_ops->ops;
48 	}
49 	return NULL;
50 }
51 
52 int
53 spdk_vfu_register_endpoint_ops(struct spdk_vfu_endpoint_ops *ops)
54 {
55 	struct tgt_pci_device_ops *pci_ops;
56 	struct spdk_vfu_endpoint_ops *tmp;
57 
58 	tmp = tgt_get_pci_device_ops(ops->name);
59 	if (tmp) {
60 		return -EEXIST;
61 	}
62 
63 	pci_ops = calloc(1, sizeof(*pci_ops));
64 	if (!pci_ops) {
65 		return -ENOMEM;
66 	}
67 	pci_ops->ops = *ops;
68 
69 	pthread_mutex_lock(&g_endpoint_lock);
70 	TAILQ_INSERT_TAIL(&g_pci_device_ops, pci_ops, link);
71 	pthread_mutex_unlock(&g_endpoint_lock);
72 
73 	return 0;
74 }
75 
76 static char *
77 tgt_get_base_path(void)
78 {
79 	return g_endpoint_path_dirname;
80 }
81 
82 int
83 spdk_vfu_set_socket_path(const char *basename)
84 {
85 	int ret;
86 
87 	if (basename && strlen(basename) > 0) {
88 		ret = snprintf(g_endpoint_path_dirname, sizeof(g_endpoint_path_dirname) - 2, "%s", basename);
89 		if (ret <= 0) {
90 			return -EINVAL;
91 		}
92 		if ((size_t)ret >= sizeof(g_endpoint_path_dirname) - 2) {
93 			SPDK_ERRLOG("Char dev dir path length %d is too long\n", ret);
94 			return -EINVAL;
95 		}
96 
97 		if (g_endpoint_path_dirname[ret - 1] != '/') {
98 			g_endpoint_path_dirname[ret] = '/';
99 			g_endpoint_path_dirname[ret + 1]  = '\0';
100 		}
101 	}
102 
103 	return 0;
104 }
105 
106 struct spdk_vfu_endpoint *
107 spdk_vfu_get_endpoint_by_name(const char *name)
108 {
109 	struct spdk_vfu_endpoint *endpoint, *tmp;
110 	bool exist = false;
111 
112 	pthread_mutex_lock(&g_endpoint_lock);
113 	TAILQ_FOREACH_SAFE(endpoint, &g_endpoint, link, tmp) {
114 		if (!strncmp(name, endpoint->name, SPDK_VFU_MAX_NAME_LEN)) {
115 			exist = true;
116 			break;
117 		}
118 	}
119 	pthread_mutex_unlock(&g_endpoint_lock);
120 
121 	if (exist) {
122 		return endpoint;
123 	}
124 	return NULL;
125 }
126 
127 static int
128 tgt_vfu_ctx_poller(void *ctx)
129 {
130 	struct spdk_vfu_endpoint *endpoint = ctx;
131 	vfu_ctx_t *vfu_ctx = endpoint->vfu_ctx;
132 	int ret;
133 
134 	ret = vfu_run_ctx(vfu_ctx);
135 	if (spdk_unlikely(ret == -1)) {
136 		if (errno == EBUSY) {
137 			return SPDK_POLLER_IDLE;
138 		}
139 
140 		if (errno == ENOTCONN) {
141 			spdk_poller_unregister(&endpoint->vfu_ctx_poller);
142 			if (endpoint->ops.detach_device) {
143 				endpoint->ops.detach_device(endpoint);
144 			}
145 			endpoint->is_attached = false;
146 			return SPDK_POLLER_BUSY;
147 		}
148 	}
149 
150 	return ret != 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
151 }
152 
153 static int
154 tgt_accept_poller(void *ctx)
155 {
156 	struct spdk_vfu_endpoint *endpoint = ctx;
157 	int ret;
158 
159 	if (endpoint->is_attached) {
160 		return SPDK_POLLER_IDLE;
161 	}
162 
163 	ret = vfu_attach_ctx(endpoint->vfu_ctx);
164 	if (ret == 0) {
165 		ret = endpoint->ops.attach_device(endpoint);
166 		if (!ret) {
167 			SPDK_NOTICELOG("%s: attached successfully\n", spdk_vfu_get_endpoint_id(endpoint));
168 			/* Polling socket too frequently will cause performance issue */
169 			endpoint->vfu_ctx_poller = SPDK_POLLER_REGISTER(tgt_vfu_ctx_poller, endpoint, 1000);
170 			endpoint->is_attached = true;
171 		}
172 		return SPDK_POLLER_BUSY;
173 	}
174 
175 	if (errno == EAGAIN || errno == EWOULDBLOCK) {
176 		return SPDK_POLLER_IDLE;
177 	}
178 
179 	return SPDK_POLLER_BUSY;
180 }
181 
182 static void
183 tgt_log_cb(vfu_ctx_t *vfu_ctx, int level, char const *msg)
184 {
185 	struct spdk_vfu_endpoint *endpoint = vfu_get_private(vfu_ctx);
186 
187 	if (level >= LOG_DEBUG) {
188 		SPDK_DEBUGLOG(vfu, "%s: %s\n", spdk_vfu_get_endpoint_id(endpoint), msg);
189 	} else if (level >= LOG_INFO) {
190 		SPDK_INFOLOG(vfu, "%s: %s\n", spdk_vfu_get_endpoint_id(endpoint), msg);
191 	} else if (level >= LOG_NOTICE) {
192 		SPDK_NOTICELOG("%s: %s\n", spdk_vfu_get_endpoint_id(endpoint), msg);
193 	} else if (level >= LOG_WARNING) {
194 		SPDK_WARNLOG("%s: %s\n", spdk_vfu_get_endpoint_id(endpoint), msg);
195 	} else {
196 		SPDK_ERRLOG("%s: %s\n", spdk_vfu_get_endpoint_id(endpoint), msg);
197 	}
198 }
199 
200 static int
201 tgt_get_log_level(void)
202 {
203 	int level;
204 
205 	if (SPDK_DEBUGLOG_FLAG_ENABLED("vfu")) {
206 		return LOG_DEBUG;
207 	}
208 
209 	level = spdk_log_to_syslog_level(spdk_log_get_level());
210 	if (level < 0) {
211 		return LOG_ERR;
212 	}
213 
214 	return level;
215 }
216 
217 static void
218 init_pci_config_space(vfu_pci_config_space_t *p, uint16_t ipin)
219 {
220 	/* MLBAR */
221 	p->hdr.bars[0].raw = 0x0;
222 	/* MUBAR */
223 	p->hdr.bars[1].raw = 0x0;
224 
225 	/* vendor specific, let's set them to zero for now */
226 	p->hdr.bars[3].raw = 0x0;
227 	p->hdr.bars[4].raw = 0x0;
228 	p->hdr.bars[5].raw = 0x0;
229 
230 	/* enable INTx */
231 	p->hdr.intr.ipin = ipin;
232 }
233 
234 static void
235 tgt_memory_region_add_cb(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
236 {
237 	struct spdk_vfu_endpoint *endpoint = vfu_get_private(vfu_ctx);
238 	void *map_start, *map_end;
239 	int ret;
240 
241 	if (!info->vaddr) {
242 		return;
243 	}
244 
245 	map_start = info->mapping.iov_base;
246 	map_end = info->mapping.iov_base + info->mapping.iov_len;
247 
248 	if (((uintptr_t)info->mapping.iov_base & MASK_2MB) ||
249 	    (info->mapping.iov_len & MASK_2MB)) {
250 		SPDK_DEBUGLOG(vfu, "Invalid memory region vaddr %p, IOVA %p-%p\n",
251 			      info->vaddr, map_start, map_end);
252 		return;
253 	}
254 
255 	if (info->prot == (PROT_WRITE | PROT_READ)) {
256 		ret = spdk_mem_register(info->mapping.iov_base, info->mapping.iov_len);
257 		if (ret) {
258 			SPDK_ERRLOG("Memory region register %p-%p failed, ret=%d\n",
259 				    map_start, map_end, ret);
260 		}
261 	}
262 
263 	if (endpoint->ops.post_memory_add) {
264 		endpoint->ops.post_memory_add(endpoint, map_start, map_end);
265 	}
266 }
267 
268 static void
269 tgt_memory_region_remove_cb(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
270 {
271 	struct spdk_vfu_endpoint *endpoint = vfu_get_private(vfu_ctx);
272 	void *map_start, *map_end;
273 	int ret = 0;
274 
275 	if (!info->vaddr) {
276 		return;
277 	}
278 
279 	map_start = info->mapping.iov_base;
280 	map_end = info->mapping.iov_base + info->mapping.iov_len;
281 
282 	if (((uintptr_t)info->mapping.iov_base & MASK_2MB) ||
283 	    (info->mapping.iov_len & MASK_2MB)) {
284 		SPDK_DEBUGLOG(vfu, "Invalid memory region vaddr %p, IOVA %p-%p\n",
285 			      info->vaddr, map_start, map_end);
286 		return;
287 	}
288 
289 	if (endpoint->ops.pre_memory_remove) {
290 		endpoint->ops.pre_memory_remove(endpoint, map_start, map_end);
291 	}
292 
293 	if (info->prot == (PROT_WRITE | PROT_READ)) {
294 		ret = spdk_mem_unregister(info->mapping.iov_base, info->mapping.iov_len);
295 		if (ret) {
296 			SPDK_ERRLOG("Memory region unregister %p-%p failed, ret=%d\n",
297 				    map_start, map_end, ret);
298 		}
299 	}
300 }
301 
302 static int
303 tgt_device_quiesce_cb(vfu_ctx_t *vfu_ctx)
304 {
305 	struct spdk_vfu_endpoint *endpoint = vfu_get_private(vfu_ctx);
306 	int ret;
307 
308 	assert(endpoint->ops.quiesce_device);
309 	ret = endpoint->ops.quiesce_device(endpoint);
310 	if (ret) {
311 		errno = EBUSY;
312 		ret = -1;
313 	}
314 
315 	return ret;
316 }
317 
318 static int
319 tgt_device_reset_cb(vfu_ctx_t *vfu_ctx, vfu_reset_type_t type)
320 {
321 	struct spdk_vfu_endpoint *endpoint = vfu_get_private(vfu_ctx);
322 
323 	SPDK_DEBUGLOG(vfu, "Device reset type %u\n", type);
324 
325 	assert(endpoint->ops.reset_device);
326 	return endpoint->ops.reset_device(endpoint);
327 }
328 
329 static int
330 tgt_endpoint_realize(struct spdk_vfu_endpoint *endpoint)
331 {
332 	int ret;
333 	uint8_t buf[512];
334 	struct vsc *vendor_cap;
335 	ssize_t cap_offset;
336 	uint16_t vendor_cap_idx, cap_size, sparse_mmap_idx;
337 	struct spdk_vfu_pci_device pci_dev;
338 	uint8_t region_idx;
339 
340 	assert(endpoint->ops.get_device_info);
341 	ret = endpoint->ops.get_device_info(endpoint, &pci_dev);
342 	if (ret) {
343 		SPDK_ERRLOG("%s: failed to get pci device info\n", spdk_vfu_get_endpoint_id(endpoint));
344 		return ret;
345 	}
346 
347 	endpoint->vfu_ctx = vfu_create_ctx(VFU_TRANS_SOCK, endpoint->uuid, LIBVFIO_USER_FLAG_ATTACH_NB,
348 					   endpoint, VFU_DEV_TYPE_PCI);
349 	if (endpoint->vfu_ctx == NULL) {
350 		SPDK_ERRLOG("%s: error creating libvfio-user context\n", spdk_vfu_get_endpoint_id(endpoint));
351 		return -EFAULT;
352 	}
353 	vfu_setup_log(endpoint->vfu_ctx, tgt_log_cb, tgt_get_log_level());
354 
355 	ret = vfu_pci_init(endpoint->vfu_ctx, VFU_PCI_TYPE_EXPRESS, PCI_HEADER_TYPE_NORMAL, 0);
356 	if (ret < 0) {
357 		SPDK_ERRLOG("vfu_ctx %p failed to initialize PCI\n", endpoint->vfu_ctx);
358 		goto error;
359 	}
360 
361 	vfu_pci_set_id(endpoint->vfu_ctx, pci_dev.id.vid, pci_dev.id.did, pci_dev.id.ssvid,
362 		       pci_dev.id.ssid);
363 	vfu_pci_set_class(endpoint->vfu_ctx, pci_dev.class.bcc, pci_dev.class.scc, pci_dev.class.pi);
364 
365 	/* Add Vendor Capabilities */
366 	for (vendor_cap_idx = 0; vendor_cap_idx < pci_dev.nr_vendor_caps; vendor_cap_idx++) {
367 		memset(buf, 0, sizeof(buf));
368 		cap_size = endpoint->ops.get_vendor_capability(endpoint, buf, 256, vendor_cap_idx);
369 		if (cap_size) {
370 			vendor_cap = (struct vsc *)buf;
371 			assert(vendor_cap->hdr.id == PCI_CAP_ID_VNDR);
372 			assert(vendor_cap->size == cap_size);
373 
374 			cap_offset = vfu_pci_add_capability(endpoint->vfu_ctx, 0, 0, vendor_cap);
375 			if (cap_offset < 0) {
376 				SPDK_ERRLOG("vfu_ctx %p failed add vendor capability\n", endpoint->vfu_ctx);
377 				ret = -EFAULT;
378 				goto error;
379 			}
380 		}
381 	}
382 
383 	/* Add Standard PCI Capabilities */
384 	cap_offset = vfu_pci_add_capability(endpoint->vfu_ctx, 0, 0, &pci_dev.pmcap);
385 	if (cap_offset < 0) {
386 		SPDK_ERRLOG("vfu_ctx %p failed add pmcap\n", endpoint->vfu_ctx);
387 		ret = -EFAULT;
388 		goto error;
389 	}
390 	SPDK_DEBUGLOG(vfu, "%s PM cap_offset %ld\n", spdk_vfu_get_endpoint_id(endpoint), cap_offset);
391 
392 	cap_offset = vfu_pci_add_capability(endpoint->vfu_ctx, 0, 0, &pci_dev.pxcap);
393 	if (cap_offset < 0) {
394 		SPDK_ERRLOG("vfu_ctx %p failed add pxcap\n", endpoint->vfu_ctx);
395 		ret = -EFAULT;
396 		goto error;
397 	}
398 	SPDK_DEBUGLOG(vfu, "%s PX cap_offset %ld\n", spdk_vfu_get_endpoint_id(endpoint), cap_offset);
399 
400 	cap_offset = vfu_pci_add_capability(endpoint->vfu_ctx, 0, 0, &pci_dev.msixcap);
401 	if (cap_offset < 0) {
402 		SPDK_ERRLOG("vfu_ctx %p failed add msixcap\n", endpoint->vfu_ctx);
403 		ret = -EFAULT;
404 		goto error;
405 	}
406 	SPDK_DEBUGLOG(vfu, "%s MSIX cap_offset %ld\n", spdk_vfu_get_endpoint_id(endpoint), cap_offset);
407 
408 	/* Setup PCI Regions */
409 	for (region_idx = 0; region_idx < VFU_PCI_DEV_NUM_REGIONS; region_idx++) {
410 		struct spdk_vfu_pci_region *region = &pci_dev.regions[region_idx];
411 		struct iovec sparse_mmap[SPDK_VFU_MAXIMUM_SPARSE_MMAP_REGIONS];
412 		if (!region->len) {
413 			continue;
414 		}
415 
416 		if (region->nr_sparse_mmaps) {
417 			assert(region->nr_sparse_mmaps <= SPDK_VFU_MAXIMUM_SPARSE_MMAP_REGIONS);
418 			for (sparse_mmap_idx = 0; sparse_mmap_idx < region->nr_sparse_mmaps; sparse_mmap_idx++) {
419 				sparse_mmap[sparse_mmap_idx].iov_base = (void *)region->mmaps[sparse_mmap_idx].offset;
420 				sparse_mmap[sparse_mmap_idx].iov_len = region->mmaps[sparse_mmap_idx].len;
421 			}
422 		}
423 
424 		ret = vfu_setup_region(endpoint->vfu_ctx, region_idx, region->len, region->access_cb, region->flags,
425 				       region->nr_sparse_mmaps ? sparse_mmap : NULL, region->nr_sparse_mmaps,
426 				       region->fd, region->offset);
427 		if (ret) {
428 			SPDK_ERRLOG("vfu_ctx %p failed to setup region %u\n", endpoint->vfu_ctx, region_idx);
429 			goto error;
430 		}
431 		SPDK_DEBUGLOG(vfu, "%s: region %u, len 0x%"PRIx64", callback %p, nr sparse mmaps %u, fd %d\n",
432 			      spdk_vfu_get_endpoint_id(endpoint), region_idx, region->len, region->access_cb,
433 			      region->nr_sparse_mmaps, region->fd);
434 	}
435 
436 	ret = vfu_setup_device_dma(endpoint->vfu_ctx, tgt_memory_region_add_cb,
437 				   tgt_memory_region_remove_cb);
438 	if (ret < 0) {
439 		SPDK_ERRLOG("vfu_ctx %p failed to setup dma callback\n", endpoint->vfu_ctx);
440 		goto error;
441 	}
442 
443 	if (endpoint->ops.reset_device) {
444 		ret = vfu_setup_device_reset_cb(endpoint->vfu_ctx, tgt_device_reset_cb);
445 		if (ret < 0) {
446 			SPDK_ERRLOG("vfu_ctx %p failed to setup reset callback\n", endpoint->vfu_ctx);
447 			goto error;
448 		}
449 	}
450 
451 	if (endpoint->ops.quiesce_device) {
452 		vfu_setup_device_quiesce_cb(endpoint->vfu_ctx, tgt_device_quiesce_cb);
453 	}
454 
455 	ret = vfu_setup_device_nr_irqs(endpoint->vfu_ctx, VFU_DEV_INTX_IRQ, pci_dev.nr_int_irqs);
456 	if (ret < 0) {
457 		SPDK_ERRLOG("vfu_ctx %p failed to setup INTX\n", endpoint->vfu_ctx);
458 		goto error;
459 	}
460 
461 	ret = vfu_setup_device_nr_irqs(endpoint->vfu_ctx, VFU_DEV_MSIX_IRQ, pci_dev.nr_msix_irqs);
462 	if (ret < 0) {
463 		SPDK_ERRLOG("vfu_ctx %p failed to setup MSIX\n", endpoint->vfu_ctx);
464 		goto error;
465 	}
466 
467 	ret = vfu_realize_ctx(endpoint->vfu_ctx);
468 	if (ret < 0) {
469 		SPDK_ERRLOG("vfu_ctx %p failed to realize\n", endpoint->vfu_ctx);
470 		goto error;
471 	}
472 
473 	endpoint->pci_config_space = vfu_pci_get_config_space(endpoint->vfu_ctx);
474 	assert(endpoint->pci_config_space != NULL);
475 	init_pci_config_space(endpoint->pci_config_space, pci_dev.intr_ipin);
476 
477 	assert(cap_offset != 0);
478 	endpoint->msix = (struct msixcap *)((uint8_t *)endpoint->pci_config_space + cap_offset);
479 
480 	return 0;
481 
482 error:
483 	if (endpoint->vfu_ctx) {
484 		vfu_destroy_ctx(endpoint->vfu_ctx);
485 	}
486 	return ret;
487 }
488 
489 static int
490 vfu_parse_core_mask(const char *mask, struct spdk_cpuset *cpumask)
491 {
492 	int rc;
493 	struct spdk_cpuset negative_vfu_mask;
494 
495 	if (cpumask == NULL) {
496 		return -1;
497 	}
498 
499 	if (mask == NULL) {
500 		spdk_cpuset_copy(cpumask, &g_tgt_core_mask);
501 		return 0;
502 	}
503 
504 	rc = spdk_cpuset_parse(cpumask, mask);
505 	if (rc < 0) {
506 		SPDK_ERRLOG("invalid cpumask %s\n", mask);
507 		return -1;
508 	}
509 
510 	spdk_cpuset_copy(&negative_vfu_mask, &g_tgt_core_mask);
511 	spdk_cpuset_negate(&negative_vfu_mask);
512 	spdk_cpuset_and(&negative_vfu_mask, cpumask);
513 
514 	if (spdk_cpuset_count(&negative_vfu_mask) != 0) {
515 		SPDK_ERRLOG("one of selected cpu is outside of core mask(=%s)\n",
516 			    spdk_cpuset_fmt(&g_tgt_core_mask));
517 		return -1;
518 	}
519 
520 	spdk_cpuset_and(cpumask, &g_tgt_core_mask);
521 
522 	if (spdk_cpuset_count(cpumask) == 0) {
523 		SPDK_ERRLOG("no cpu is selected among core mask(=%s)\n",
524 			    spdk_cpuset_fmt(&g_tgt_core_mask));
525 		return -1;
526 	}
527 
528 	return 0;
529 }
530 
531 static void
532 tgt_endpoint_start_thread(void *arg1)
533 {
534 	struct spdk_vfu_endpoint *endpoint = arg1;
535 
536 	endpoint->accept_poller = SPDK_POLLER_REGISTER(tgt_accept_poller, endpoint, 1000);
537 	assert(endpoint->accept_poller != NULL);
538 }
539 
540 static void
541 tgt_endpoint_thread_try_exit(void *arg1)
542 {
543 	struct spdk_vfu_endpoint *endpoint = arg1;
544 	static spdk_vfu_fini_cb fini_cb = NULL;
545 	int res;
546 
547 	res = endpoint->ops.destruct(endpoint);
548 	if (res == -EAGAIN) {
549 		/* Let's retry */
550 		spdk_thread_send_msg(endpoint->thread, tgt_endpoint_thread_try_exit, endpoint);
551 		return;
552 	} else if (res) {
553 		/* We're ignoring this error for now as we have nothing to do with it */
554 		SPDK_ERRLOG("Endpoint destruct failed with %d\n", res);
555 	}
556 
557 	free(endpoint);
558 
559 	pthread_mutex_lock(&g_endpoint_lock);
560 	if (g_fini_cb) { /* called due to spdk_vfu_fini() */
561 		g_fini_endpoint_cnt--;
562 
563 		if (!g_fini_endpoint_cnt) {
564 			fini_cb = g_fini_cb;
565 			g_fini_cb = NULL;
566 		}
567 	}
568 	pthread_mutex_unlock(&g_endpoint_lock);
569 
570 	if (fini_cb) {
571 		fini_cb();
572 	}
573 
574 	spdk_thread_exit(spdk_get_thread());
575 }
576 
577 static void
578 tgt_endpoint_thread_exit(void *arg1)
579 {
580 	struct spdk_vfu_endpoint *endpoint = arg1;
581 
582 	spdk_poller_unregister(&endpoint->accept_poller);
583 	spdk_poller_unregister(&endpoint->vfu_ctx_poller);
584 
585 	/* Ensure the attached device is stopped before destroying the vfu context */
586 	if (endpoint->ops.detach_device) {
587 		endpoint->ops.detach_device(endpoint);
588 	}
589 
590 	if (endpoint->vfu_ctx) {
591 		vfu_destroy_ctx(endpoint->vfu_ctx);
592 	}
593 
594 	tgt_endpoint_thread_try_exit(endpoint);
595 }
596 
597 int
598 spdk_vfu_create_endpoint(const char *endpoint_name, const char *cpumask_str,
599 			 const char *dev_type_name)
600 {
601 	char *basename;
602 	char uuid[PATH_MAX] = "";
603 	struct spdk_cpuset cpumask = {};
604 	struct spdk_vfu_endpoint *endpoint;
605 	struct spdk_vfu_endpoint_ops *ops;
606 	int ret = 0;
607 
608 	ret = vfu_parse_core_mask(cpumask_str, &cpumask);
609 	if (ret) {
610 		return ret;
611 	}
612 
613 	if (strlen(endpoint_name) >= SPDK_VFU_MAX_NAME_LEN - 1) {
614 		return -ENAMETOOLONG;
615 	}
616 
617 	if (spdk_vfu_get_endpoint_by_name(endpoint_name)) {
618 		SPDK_ERRLOG("%s already exist\n", endpoint_name);
619 		return -EEXIST;
620 	}
621 
622 	/* Find supported PCI device type */
623 	ops = tgt_get_pci_device_ops(dev_type_name);
624 	if (!ops) {
625 		SPDK_ERRLOG("Request %s device type isn't registered\n", dev_type_name);
626 		return -ENOTSUP;
627 	}
628 
629 	basename = tgt_get_base_path();
630 	if (snprintf(uuid, sizeof(uuid), "%s%s", basename, endpoint_name) >= (int)sizeof(uuid)) {
631 		SPDK_ERRLOG("Resulting socket path for endpoint %s is too long: %s%s\n",
632 			    endpoint_name, basename, endpoint_name);
633 		return -EINVAL;
634 	}
635 
636 	endpoint = calloc(1, sizeof(*endpoint));
637 	if (!endpoint) {
638 		return -ENOMEM;
639 	}
640 
641 	endpoint->endpoint_ctx = ops->init(endpoint, basename, endpoint_name);
642 	if (!endpoint->endpoint_ctx) {
643 		free(endpoint);
644 		return -EINVAL;
645 	}
646 	endpoint->ops = *ops;
647 	snprintf(endpoint->name, SPDK_VFU_MAX_NAME_LEN, "%s", endpoint_name);
648 	snprintf(endpoint->uuid, sizeof(uuid), "%s", uuid);
649 
650 	SPDK_DEBUGLOG(vfu, "Construct endpoint %s\n", endpoint_name);
651 	/* Endpoint realize */
652 	ret = tgt_endpoint_realize(endpoint);
653 	if (ret) {
654 		endpoint->ops.destruct(endpoint);
655 		free(endpoint);
656 		return ret;
657 	}
658 
659 	endpoint->thread = spdk_thread_create(endpoint_name, &cpumask);
660 	if (!endpoint->thread) {
661 		endpoint->ops.destruct(endpoint);
662 		vfu_destroy_ctx(endpoint->vfu_ctx);
663 		free(endpoint);
664 		return -EFAULT;
665 	}
666 
667 	ret = 0;
668 	pthread_mutex_lock(&g_endpoint_lock);
669 	if (!g_fini_cb) {
670 		TAILQ_INSERT_TAIL(&g_endpoint, endpoint, link);
671 	} else { /* spdk_vfu_fini has been called */
672 		ret = -EPERM;
673 	}
674 	pthread_mutex_unlock(&g_endpoint_lock);
675 
676 	if (ret) {
677 		/* we're in the process of destruction, no new endpoint creation is allowed */
678 		spdk_thread_destroy(endpoint->thread);
679 		endpoint->ops.destruct(endpoint);
680 		vfu_destroy_ctx(endpoint->vfu_ctx);
681 		free(endpoint);
682 		return -EFAULT;
683 	}
684 
685 	spdk_thread_send_msg(endpoint->thread, tgt_endpoint_start_thread, endpoint);
686 
687 	return 0;
688 }
689 
690 int
691 spdk_vfu_delete_endpoint(const char *endpoint_name)
692 {
693 	struct spdk_vfu_endpoint *endpoint;
694 
695 	endpoint = spdk_vfu_get_endpoint_by_name(endpoint_name);
696 	if (!endpoint) {
697 		SPDK_ERRLOG("%s doesn't exist\n", endpoint_name);
698 		return -ENOENT;
699 	}
700 
701 	SPDK_NOTICELOG("Destruct endpoint %s\n", endpoint_name);
702 
703 	pthread_mutex_lock(&g_endpoint_lock);
704 	TAILQ_REMOVE(&g_endpoint, endpoint, link);
705 	pthread_mutex_unlock(&g_endpoint_lock);
706 	spdk_thread_send_msg(endpoint->thread, tgt_endpoint_thread_exit, endpoint);
707 
708 	return 0;
709 }
710 
711 const char *
712 spdk_vfu_get_endpoint_id(struct spdk_vfu_endpoint *endpoint)
713 {
714 	return endpoint->uuid;
715 }
716 
717 const char *
718 spdk_vfu_get_endpoint_name(struct spdk_vfu_endpoint *endpoint)
719 {
720 	return endpoint->name;
721 }
722 
723 vfu_ctx_t *
724 spdk_vfu_get_vfu_ctx(struct spdk_vfu_endpoint *endpoint)
725 {
726 	return endpoint->vfu_ctx;
727 }
728 
729 void *
730 spdk_vfu_get_endpoint_private(struct spdk_vfu_endpoint *endpoint)
731 {
732 	return endpoint->endpoint_ctx;
733 }
734 
735 bool
736 spdk_vfu_endpoint_msix_enabled(struct spdk_vfu_endpoint *endpoint)
737 {
738 	return endpoint->msix->mxc.mxe;
739 }
740 
741 bool
742 spdk_vfu_endpoint_intx_enabled(struct spdk_vfu_endpoint *endpoint)
743 {
744 	return !endpoint->pci_config_space->hdr.cmd.id;
745 }
746 
747 void *
748 spdk_vfu_endpoint_get_pci_config(struct spdk_vfu_endpoint *endpoint)
749 {
750 	return (void *)endpoint->pci_config_space;
751 }
752 
753 void
754 spdk_vfu_init(spdk_vfu_init_cb init_cb)
755 {
756 	uint32_t i;
757 	size_t len;
758 
759 	if (g_endpoint_path_dirname[0] == '\0') {
760 		if (getcwd(g_endpoint_path_dirname, sizeof(g_endpoint_path_dirname) - 2) == NULL) {
761 			SPDK_ERRLOG("getcwd failed\n");
762 			return;
763 		}
764 
765 		len = strlen(g_endpoint_path_dirname);
766 		if (g_endpoint_path_dirname[len - 1] != '/') {
767 			g_endpoint_path_dirname[len] = '/';
768 			g_endpoint_path_dirname[len + 1] = '\0';
769 		}
770 	}
771 
772 	spdk_cpuset_zero(&g_tgt_core_mask);
773 	SPDK_ENV_FOREACH_CORE(i) {
774 		spdk_cpuset_set_cpu(&g_tgt_core_mask, i, true);
775 	}
776 
777 	init_cb(0);
778 }
779 
780 void *
781 spdk_vfu_map_one(struct spdk_vfu_endpoint *endpoint, uint64_t addr, uint64_t len, dma_sg_t *sg,
782 		 struct iovec *iov,
783 		 int prot)
784 {
785 	int ret;
786 
787 	assert(endpoint != NULL);
788 	assert(endpoint->vfu_ctx != NULL);
789 	assert(sg != NULL);
790 	assert(iov != NULL);
791 
792 	ret = vfu_addr_to_sgl(endpoint->vfu_ctx, (void *)(uintptr_t)addr, len, sg, 1, prot);
793 	if (ret < 0) {
794 		return NULL;
795 	}
796 
797 	ret = vfu_sgl_get(endpoint->vfu_ctx, sg, iov, 1, 0);
798 	if (ret != 0) {
799 		return NULL;
800 	}
801 
802 	assert(iov->iov_base != NULL);
803 	return iov->iov_base;
804 }
805 
806 void
807 spdk_vfu_unmap_sg(struct spdk_vfu_endpoint *endpoint, dma_sg_t *sg, struct iovec *iov, int iovcnt)
808 {
809 	assert(endpoint != NULL);
810 	assert(endpoint->vfu_ctx != NULL);
811 	assert(sg != NULL);
812 	assert(iov != NULL);
813 
814 	vfu_sgl_put(endpoint->vfu_ctx, sg, iov, iovcnt);
815 }
816 
817 void
818 spdk_vfu_fini(spdk_vfu_fini_cb fini_cb)
819 {
820 	struct spdk_vfu_endpoint *endpoint, *tmp;
821 	struct tgt_pci_device_ops *ops, *ops_tmp;
822 	uint32_t endpoint_cnt = 0;
823 
824 	pthread_mutex_lock(&g_endpoint_lock);
825 	assert(!g_fini_cb);
826 	TAILQ_FOREACH_SAFE(ops, &g_pci_device_ops, link, ops_tmp) {
827 		TAILQ_REMOVE(&g_pci_device_ops, ops, link);
828 		free(ops);
829 	}
830 
831 	TAILQ_FOREACH_SAFE(endpoint, &g_endpoint, link, tmp) {
832 		TAILQ_REMOVE(&g_endpoint, endpoint, link);
833 		endpoint_cnt++;
834 		spdk_thread_send_msg(endpoint->thread, tgt_endpoint_thread_exit, endpoint);
835 	}
836 
837 	/* NOTE: g_fini_cb and g_fini_endpoint_cnt are accessed under the same mutex so it's safe to assign them here */
838 	if (endpoint_cnt) {
839 		g_fini_endpoint_cnt = endpoint_cnt;
840 		g_fini_cb = fini_cb;
841 	}
842 	pthread_mutex_unlock(&g_endpoint_lock);
843 
844 	if (!endpoint_cnt) {
845 		fini_cb();
846 	}
847 }
848 SPDK_LOG_REGISTER_COMPONENT(vfu)
849