xref: /dpdk/lib/gpudev/gpudev.c (revision f4eac3a09c51a1a2dab1f2fd3a10fe0619286a0d)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2021 NVIDIA Corporation & Affiliates
3  */
4 
5 #include <rte_eal.h>
6 #include <rte_tailq.h>
7 #include <rte_rwlock.h>
8 #include <rte_string_fns.h>
9 #include <rte_memzone.h>
10 #include <rte_malloc.h>
11 #include <rte_errno.h>
12 #include <rte_log.h>
13 
14 #include "rte_gpudev.h"
15 #include "gpudev_driver.h"
16 
17 /* Logging */
18 RTE_LOG_REGISTER_DEFAULT(gpu_logtype, NOTICE);
19 #define GPU_LOG(level, ...) \
20 	rte_log(RTE_LOG_ ## level, gpu_logtype, RTE_FMT("gpu: " \
21 		RTE_FMT_HEAD(__VA_ARGS__, ) "\n", RTE_FMT_TAIL(__VA_ARGS__, )))
22 
23 /* Set any driver error as EPERM */
24 #define GPU_DRV_RET(function) \
25 	((function != 0) ? -(rte_errno = EPERM) : (rte_errno = 0))
26 
27 /* Array of devices */
28 static struct rte_gpu *gpus;
29 /* Number of currently valid devices */
30 static int16_t gpu_max;
31 /* Number of currently valid devices */
32 static int16_t gpu_count;
33 
34 /* Shared memory between processes. */
35 static const char *GPU_MEMZONE = "rte_gpu_shared";
36 static struct {
37 	__extension__ struct rte_gpu_mpshared gpus[0];
38 } *gpu_shared_mem;
39 
40 /* Event callback object */
41 struct rte_gpu_callback {
42 	TAILQ_ENTRY(rte_gpu_callback) next;
43 	rte_gpu_callback_t *function;
44 	void *user_data;
45 	enum rte_gpu_event event;
46 };
47 static rte_rwlock_t gpu_callback_lock = RTE_RWLOCK_INITIALIZER;
48 static void gpu_free_callbacks(struct rte_gpu *dev);
49 
50 int
51 rte_gpu_init(size_t dev_max)
52 {
53 	if (dev_max == 0 || dev_max > INT16_MAX) {
54 		GPU_LOG(ERR, "invalid array size");
55 		rte_errno = EINVAL;
56 		return -rte_errno;
57 	}
58 
59 	/* No lock, it must be called before or during first probing. */
60 	if (gpus != NULL) {
61 		GPU_LOG(ERR, "already initialized");
62 		rte_errno = EBUSY;
63 		return -rte_errno;
64 	}
65 
66 	gpus = calloc(dev_max, sizeof(struct rte_gpu));
67 	if (gpus == NULL) {
68 		GPU_LOG(ERR, "cannot initialize library");
69 		rte_errno = ENOMEM;
70 		return -rte_errno;
71 	}
72 
73 	gpu_max = dev_max;
74 	return 0;
75 }
76 
77 uint16_t
78 rte_gpu_count_avail(void)
79 {
80 	return gpu_count;
81 }
82 
83 bool
84 rte_gpu_is_valid(int16_t dev_id)
85 {
86 	if (dev_id >= 0 && dev_id < gpu_max &&
87 		gpus[dev_id].process_state == RTE_GPU_STATE_INITIALIZED)
88 		return true;
89 	return false;
90 }
91 
92 static bool
93 gpu_match_parent(int16_t dev_id, int16_t parent)
94 {
95 	if (parent == RTE_GPU_ID_ANY)
96 		return true;
97 	return gpus[dev_id].mpshared->info.parent == parent;
98 }
99 
100 int16_t
101 rte_gpu_find_next(int16_t dev_id, int16_t parent)
102 {
103 	if (dev_id < 0)
104 		dev_id = 0;
105 	while (dev_id < gpu_max &&
106 			(gpus[dev_id].process_state == RTE_GPU_STATE_UNUSED ||
107 			!gpu_match_parent(dev_id, parent)))
108 		dev_id++;
109 
110 	if (dev_id >= gpu_max)
111 		return RTE_GPU_ID_NONE;
112 	return dev_id;
113 }
114 
115 static int16_t
116 gpu_find_free_id(void)
117 {
118 	int16_t dev_id;
119 
120 	for (dev_id = 0; dev_id < gpu_max; dev_id++) {
121 		if (gpus[dev_id].process_state == RTE_GPU_STATE_UNUSED)
122 			return dev_id;
123 	}
124 	return RTE_GPU_ID_NONE;
125 }
126 
127 static struct rte_gpu *
128 gpu_get_by_id(int16_t dev_id)
129 {
130 	if (!rte_gpu_is_valid(dev_id))
131 		return NULL;
132 	return &gpus[dev_id];
133 }
134 
135 struct rte_gpu *
136 rte_gpu_get_by_name(const char *name)
137 {
138 	int16_t dev_id;
139 	struct rte_gpu *dev;
140 
141 	if (name == NULL) {
142 		rte_errno = EINVAL;
143 		return NULL;
144 	}
145 
146 	RTE_GPU_FOREACH(dev_id) {
147 		dev = &gpus[dev_id];
148 		if (strncmp(name, dev->mpshared->name, RTE_DEV_NAME_MAX_LEN) == 0)
149 			return dev;
150 	}
151 	return NULL;
152 }
153 
154 static int
155 gpu_shared_mem_init(void)
156 {
157 	const struct rte_memzone *memzone;
158 
159 	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
160 		memzone = rte_memzone_reserve(GPU_MEMZONE,
161 				sizeof(*gpu_shared_mem) +
162 				sizeof(*gpu_shared_mem->gpus) * gpu_max,
163 				SOCKET_ID_ANY, 0);
164 	} else {
165 		memzone = rte_memzone_lookup(GPU_MEMZONE);
166 	}
167 	if (memzone == NULL) {
168 		GPU_LOG(ERR, "cannot initialize shared memory");
169 		rte_errno = ENOMEM;
170 		return -rte_errno;
171 	}
172 
173 	gpu_shared_mem = memzone->addr;
174 	return 0;
175 }
176 
177 struct rte_gpu *
178 rte_gpu_allocate(const char *name)
179 {
180 	int16_t dev_id;
181 	struct rte_gpu *dev;
182 
183 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
184 		GPU_LOG(ERR, "only primary process can allocate device");
185 		rte_errno = EPERM;
186 		return NULL;
187 	}
188 	if (name == NULL) {
189 		GPU_LOG(ERR, "allocate device without a name");
190 		rte_errno = EINVAL;
191 		return NULL;
192 	}
193 
194 	/* implicit initialization of library before adding first device */
195 	if (gpus == NULL && rte_gpu_init(RTE_GPU_DEFAULT_MAX) < 0)
196 		return NULL;
197 
198 	/* initialize shared memory before adding first device */
199 	if (gpu_shared_mem == NULL && gpu_shared_mem_init() < 0)
200 		return NULL;
201 
202 	if (rte_gpu_get_by_name(name) != NULL) {
203 		GPU_LOG(ERR, "device with name %s already exists", name);
204 		rte_errno = EEXIST;
205 		return NULL;
206 	}
207 	dev_id = gpu_find_free_id();
208 	if (dev_id == RTE_GPU_ID_NONE) {
209 		GPU_LOG(ERR, "reached maximum number of devices");
210 		rte_errno = ENOENT;
211 		return NULL;
212 	}
213 
214 	dev = &gpus[dev_id];
215 	memset(dev, 0, sizeof(*dev));
216 
217 	dev->mpshared = &gpu_shared_mem->gpus[dev_id];
218 	memset(dev->mpshared, 0, sizeof(*dev->mpshared));
219 
220 	if (rte_strscpy(dev->mpshared->name, name, RTE_DEV_NAME_MAX_LEN) < 0) {
221 		GPU_LOG(ERR, "device name too long: %s", name);
222 		rte_errno = ENAMETOOLONG;
223 		return NULL;
224 	}
225 	dev->mpshared->info.name = dev->mpshared->name;
226 	dev->mpshared->info.dev_id = dev_id;
227 	dev->mpshared->info.numa_node = -1;
228 	dev->mpshared->info.parent = RTE_GPU_ID_NONE;
229 	TAILQ_INIT(&dev->callbacks);
230 	__atomic_fetch_add(&dev->mpshared->process_refcnt, 1, __ATOMIC_RELAXED);
231 
232 	gpu_count++;
233 	GPU_LOG(DEBUG, "new device %s (id %d) of total %d",
234 			name, dev_id, gpu_count);
235 	return dev;
236 }
237 
238 struct rte_gpu *
239 rte_gpu_attach(const char *name)
240 {
241 	int16_t dev_id;
242 	struct rte_gpu *dev;
243 	struct rte_gpu_mpshared *shared_dev;
244 
245 	if (rte_eal_process_type() != RTE_PROC_SECONDARY) {
246 		GPU_LOG(ERR, "only secondary process can attach device");
247 		rte_errno = EPERM;
248 		return NULL;
249 	}
250 	if (name == NULL) {
251 		GPU_LOG(ERR, "attach device without a name");
252 		rte_errno = EINVAL;
253 		return NULL;
254 	}
255 
256 	/* implicit initialization of library before adding first device */
257 	if (gpus == NULL && rte_gpu_init(RTE_GPU_DEFAULT_MAX) < 0)
258 		return NULL;
259 
260 	/* initialize shared memory before adding first device */
261 	if (gpu_shared_mem == NULL && gpu_shared_mem_init() < 0)
262 		return NULL;
263 
264 	for (dev_id = 0; dev_id < gpu_max; dev_id++) {
265 		shared_dev = &gpu_shared_mem->gpus[dev_id];
266 		if (strncmp(name, shared_dev->name, RTE_DEV_NAME_MAX_LEN) == 0)
267 			break;
268 	}
269 	if (dev_id >= gpu_max) {
270 		GPU_LOG(ERR, "device with name %s not found", name);
271 		rte_errno = ENOENT;
272 		return NULL;
273 	}
274 	dev = &gpus[dev_id];
275 	memset(dev, 0, sizeof(*dev));
276 
277 	TAILQ_INIT(&dev->callbacks);
278 	dev->mpshared = shared_dev;
279 	__atomic_fetch_add(&dev->mpshared->process_refcnt, 1, __ATOMIC_RELAXED);
280 
281 	gpu_count++;
282 	GPU_LOG(DEBUG, "attached device %s (id %d) of total %d",
283 			name, dev_id, gpu_count);
284 	return dev;
285 }
286 
287 int16_t
288 rte_gpu_add_child(const char *name, int16_t parent, uint64_t child_context)
289 {
290 	struct rte_gpu *dev;
291 
292 	if (!rte_gpu_is_valid(parent)) {
293 		GPU_LOG(ERR, "add child to invalid parent ID %d", parent);
294 		rte_errno = ENODEV;
295 		return -rte_errno;
296 	}
297 
298 	dev = rte_gpu_allocate(name);
299 	if (dev == NULL)
300 		return -rte_errno;
301 
302 	dev->mpshared->info.parent = parent;
303 	dev->mpshared->info.context = child_context;
304 
305 	rte_gpu_complete_new(dev);
306 	return dev->mpshared->info.dev_id;
307 }
308 
309 void
310 rte_gpu_complete_new(struct rte_gpu *dev)
311 {
312 	if (dev == NULL)
313 		return;
314 
315 	dev->process_state = RTE_GPU_STATE_INITIALIZED;
316 	rte_gpu_notify(dev, RTE_GPU_EVENT_NEW);
317 }
318 
319 int
320 rte_gpu_release(struct rte_gpu *dev)
321 {
322 	int16_t dev_id, child;
323 
324 	if (dev == NULL) {
325 		rte_errno = ENODEV;
326 		return -rte_errno;
327 	}
328 	dev_id = dev->mpshared->info.dev_id;
329 	RTE_GPU_FOREACH_CHILD(child, dev_id) {
330 		GPU_LOG(ERR, "cannot release device %d with child %d",
331 				dev_id, child);
332 		rte_errno = EBUSY;
333 		return -rte_errno;
334 	}
335 
336 	GPU_LOG(DEBUG, "free device %s (id %d)",
337 			dev->mpshared->info.name, dev->mpshared->info.dev_id);
338 	rte_gpu_notify(dev, RTE_GPU_EVENT_DEL);
339 
340 	gpu_free_callbacks(dev);
341 	dev->process_state = RTE_GPU_STATE_UNUSED;
342 	__atomic_fetch_sub(&dev->mpshared->process_refcnt, 1, __ATOMIC_RELAXED);
343 	gpu_count--;
344 
345 	return 0;
346 }
347 
348 int
349 rte_gpu_close(int16_t dev_id)
350 {
351 	int firsterr, binerr;
352 	int *lasterr = &firsterr;
353 	struct rte_gpu *dev;
354 
355 	dev = gpu_get_by_id(dev_id);
356 	if (dev == NULL) {
357 		GPU_LOG(ERR, "close invalid device ID %d", dev_id);
358 		rte_errno = ENODEV;
359 		return -rte_errno;
360 	}
361 
362 	if (dev->ops.dev_close != NULL) {
363 		*lasterr = GPU_DRV_RET(dev->ops.dev_close(dev));
364 		if (*lasterr != 0)
365 			lasterr = &binerr;
366 	}
367 
368 	*lasterr = rte_gpu_release(dev);
369 
370 	rte_errno = -firsterr;
371 	return firsterr;
372 }
373 
374 int
375 rte_gpu_callback_register(int16_t dev_id, enum rte_gpu_event event,
376 		rte_gpu_callback_t *function, void *user_data)
377 {
378 	int16_t next_dev, last_dev;
379 	struct rte_gpu_callback_list *callbacks;
380 	struct rte_gpu_callback *callback;
381 
382 	if (!rte_gpu_is_valid(dev_id) && dev_id != RTE_GPU_ID_ANY) {
383 		GPU_LOG(ERR, "register callback of invalid ID %d", dev_id);
384 		rte_errno = ENODEV;
385 		return -rte_errno;
386 	}
387 	if (function == NULL) {
388 		GPU_LOG(ERR, "cannot register callback without function");
389 		rte_errno = EINVAL;
390 		return -rte_errno;
391 	}
392 
393 	if (dev_id == RTE_GPU_ID_ANY) {
394 		next_dev = 0;
395 		last_dev = gpu_max - 1;
396 	} else {
397 		next_dev = last_dev = dev_id;
398 	}
399 
400 	rte_rwlock_write_lock(&gpu_callback_lock);
401 	do {
402 		callbacks = &gpus[next_dev].callbacks;
403 
404 		/* check if not already registered */
405 		TAILQ_FOREACH(callback, callbacks, next) {
406 			if (callback->event == event &&
407 					callback->function == function &&
408 					callback->user_data == user_data) {
409 				GPU_LOG(INFO, "callback already registered");
410 				return 0;
411 			}
412 		}
413 
414 		callback = malloc(sizeof(*callback));
415 		if (callback == NULL) {
416 			GPU_LOG(ERR, "cannot allocate callback");
417 			return -ENOMEM;
418 		}
419 		callback->function = function;
420 		callback->user_data = user_data;
421 		callback->event = event;
422 		TAILQ_INSERT_TAIL(callbacks, callback, next);
423 
424 	} while (++next_dev <= last_dev);
425 	rte_rwlock_write_unlock(&gpu_callback_lock);
426 
427 	return 0;
428 }
429 
430 int
431 rte_gpu_callback_unregister(int16_t dev_id, enum rte_gpu_event event,
432 		rte_gpu_callback_t *function, void *user_data)
433 {
434 	int16_t next_dev, last_dev;
435 	struct rte_gpu_callback_list *callbacks;
436 	struct rte_gpu_callback *callback, *nextcb;
437 
438 	if (!rte_gpu_is_valid(dev_id) && dev_id != RTE_GPU_ID_ANY) {
439 		GPU_LOG(ERR, "unregister callback of invalid ID %d", dev_id);
440 		rte_errno = ENODEV;
441 		return -rte_errno;
442 	}
443 	if (function == NULL) {
444 		GPU_LOG(ERR, "cannot unregister callback without function");
445 		rte_errno = EINVAL;
446 		return -rte_errno;
447 	}
448 
449 	if (dev_id == RTE_GPU_ID_ANY) {
450 		next_dev = 0;
451 		last_dev = gpu_max - 1;
452 	} else {
453 		next_dev = last_dev = dev_id;
454 	}
455 
456 	rte_rwlock_write_lock(&gpu_callback_lock);
457 	do {
458 		callbacks = &gpus[next_dev].callbacks;
459 		RTE_TAILQ_FOREACH_SAFE(callback, callbacks, next, nextcb) {
460 			if (callback->event != event ||
461 					callback->function != function ||
462 					(callback->user_data != user_data &&
463 					user_data != (void *)-1))
464 				continue;
465 			TAILQ_REMOVE(callbacks, callback, next);
466 			free(callback);
467 		}
468 	} while (++next_dev <= last_dev);
469 	rte_rwlock_write_unlock(&gpu_callback_lock);
470 
471 	return 0;
472 }
473 
474 static void
475 gpu_free_callbacks(struct rte_gpu *dev)
476 {
477 	struct rte_gpu_callback_list *callbacks;
478 	struct rte_gpu_callback *callback, *nextcb;
479 
480 	callbacks = &dev->callbacks;
481 	rte_rwlock_write_lock(&gpu_callback_lock);
482 	RTE_TAILQ_FOREACH_SAFE(callback, callbacks, next, nextcb) {
483 		TAILQ_REMOVE(callbacks, callback, next);
484 		free(callback);
485 	}
486 	rte_rwlock_write_unlock(&gpu_callback_lock);
487 }
488 
489 void
490 rte_gpu_notify(struct rte_gpu *dev, enum rte_gpu_event event)
491 {
492 	int16_t dev_id;
493 	struct rte_gpu_callback *callback;
494 
495 	dev_id = dev->mpshared->info.dev_id;
496 	rte_rwlock_read_lock(&gpu_callback_lock);
497 	TAILQ_FOREACH(callback, &dev->callbacks, next) {
498 		if (callback->event != event || callback->function == NULL)
499 			continue;
500 		callback->function(dev_id, event, callback->user_data);
501 	}
502 	rte_rwlock_read_unlock(&gpu_callback_lock);
503 }
504 
505 int
506 rte_gpu_info_get(int16_t dev_id, struct rte_gpu_info *info)
507 {
508 	struct rte_gpu *dev;
509 
510 	dev = gpu_get_by_id(dev_id);
511 	if (dev == NULL) {
512 		GPU_LOG(ERR, "query invalid device ID %d", dev_id);
513 		rte_errno = ENODEV;
514 		return -rte_errno;
515 	}
516 	if (info == NULL) {
517 		GPU_LOG(ERR, "query without storage");
518 		rte_errno = EINVAL;
519 		return -rte_errno;
520 	}
521 
522 	if (dev->ops.dev_info_get == NULL) {
523 		*info = dev->mpshared->info;
524 		return 0;
525 	}
526 	return GPU_DRV_RET(dev->ops.dev_info_get(dev, info));
527 }
528 
529 void *
530 rte_gpu_mem_alloc(int16_t dev_id, size_t size, unsigned int align)
531 {
532 	struct rte_gpu *dev;
533 	void *ptr;
534 	int ret;
535 
536 	dev = gpu_get_by_id(dev_id);
537 	if (dev == NULL) {
538 		GPU_LOG(ERR, "alloc mem for invalid device ID %d", dev_id);
539 		rte_errno = ENODEV;
540 		return NULL;
541 	}
542 
543 	if (dev->ops.mem_alloc == NULL) {
544 		GPU_LOG(ERR, "mem allocation not supported");
545 		rte_errno = ENOTSUP;
546 		return NULL;
547 	}
548 
549 	if (size == 0) /* dry-run */
550 		return NULL;
551 
552 	if (align && !rte_is_power_of_2(align)) {
553 		GPU_LOG(ERR, "requested alignment is not a power of two %u", align);
554 		rte_errno = EINVAL;
555 		return NULL;
556 	}
557 
558 	ret = dev->ops.mem_alloc(dev, size, align, &ptr);
559 
560 	switch (ret) {
561 	case 0:
562 		return ptr;
563 	case -ENOMEM:
564 	case -E2BIG:
565 		rte_errno = -ret;
566 		return NULL;
567 	default:
568 		rte_errno = -EPERM;
569 		return NULL;
570 	}
571 }
572 
573 int
574 rte_gpu_mem_free(int16_t dev_id, void *ptr)
575 {
576 	struct rte_gpu *dev;
577 
578 	dev = gpu_get_by_id(dev_id);
579 	if (dev == NULL) {
580 		GPU_LOG(ERR, "free mem for invalid device ID %d", dev_id);
581 		rte_errno = ENODEV;
582 		return -rte_errno;
583 	}
584 
585 	if (dev->ops.mem_free == NULL) {
586 		rte_errno = ENOTSUP;
587 		return -rte_errno;
588 	}
589 
590 	if (ptr == NULL) /* dry-run */
591 		return 0;
592 
593 	return GPU_DRV_RET(dev->ops.mem_free(dev, ptr));
594 }
595 
596 int
597 rte_gpu_mem_register(int16_t dev_id, size_t size, void *ptr)
598 {
599 	struct rte_gpu *dev;
600 
601 	dev = gpu_get_by_id(dev_id);
602 	if (dev == NULL) {
603 		GPU_LOG(ERR, "alloc mem for invalid device ID %d", dev_id);
604 		rte_errno = ENODEV;
605 		return -rte_errno;
606 	}
607 
608 	if (dev->ops.mem_register == NULL) {
609 		GPU_LOG(ERR, "mem registration not supported");
610 		rte_errno = ENOTSUP;
611 		return -rte_errno;
612 	}
613 
614 	if (ptr == NULL || size == 0) /* dry-run  */
615 		return 0;
616 
617 	return GPU_DRV_RET(dev->ops.mem_register(dev, size, ptr));
618 }
619 
620 int
621 rte_gpu_mem_unregister(int16_t dev_id, void *ptr)
622 {
623 	struct rte_gpu *dev;
624 
625 	dev = gpu_get_by_id(dev_id);
626 	if (dev == NULL) {
627 		GPU_LOG(ERR, "unregister mem for invalid device ID %d", dev_id);
628 		rte_errno = ENODEV;
629 		return -rte_errno;
630 	}
631 
632 	if (dev->ops.mem_unregister == NULL) {
633 		rte_errno = ENOTSUP;
634 		return -rte_errno;
635 	}
636 
637 	if (ptr == NULL) /* dry-run */
638 		return 0;
639 
640 	return GPU_DRV_RET(dev->ops.mem_unregister(dev, ptr));
641 }
642 
643 void *
644 rte_gpu_mem_cpu_map(int16_t dev_id, size_t size, void *ptr)
645 {
646 	struct rte_gpu *dev;
647 	void *ptr_out;
648 	int ret;
649 
650 	dev = gpu_get_by_id(dev_id);
651 	if (dev == NULL) {
652 		GPU_LOG(ERR, "mem CPU map for invalid device ID %d", dev_id);
653 		rte_errno = ENODEV;
654 		return NULL;
655 	}
656 
657 	if (dev->ops.mem_cpu_map == NULL) {
658 		GPU_LOG(ERR, "mem CPU map not supported");
659 		rte_errno = ENOTSUP;
660 		return NULL;
661 	}
662 
663 	if (ptr == NULL || size == 0) /* dry-run  */
664 		return NULL;
665 
666 	ret = GPU_DRV_RET(dev->ops.mem_cpu_map(dev, size, ptr, &ptr_out));
667 
668 	switch (ret) {
669 	case 0:
670 		return ptr_out;
671 	case -ENOMEM:
672 	case -E2BIG:
673 		rte_errno = -ret;
674 		return NULL;
675 	default:
676 		rte_errno = -EPERM;
677 		return NULL;
678 	}
679 }
680 
681 int
682 rte_gpu_mem_cpu_unmap(int16_t dev_id, void *ptr)
683 {
684 	struct rte_gpu *dev;
685 
686 	dev = gpu_get_by_id(dev_id);
687 	if (dev == NULL) {
688 		GPU_LOG(ERR, "cpu_unmap mem for invalid device ID %d", dev_id);
689 		rte_errno = ENODEV;
690 		return -rte_errno;
691 	}
692 
693 	if (dev->ops.mem_cpu_unmap == NULL) {
694 		rte_errno = ENOTSUP;
695 		return -rte_errno;
696 	}
697 
698 	if (ptr == NULL) /* dry-run */
699 		return 0;
700 
701 	return GPU_DRV_RET(dev->ops.mem_cpu_unmap(dev, ptr));
702 }
703 
704 int
705 rte_gpu_wmb(int16_t dev_id)
706 {
707 	struct rte_gpu *dev;
708 
709 	dev = gpu_get_by_id(dev_id);
710 	if (dev == NULL) {
711 		GPU_LOG(ERR, "memory barrier for invalid device ID %d", dev_id);
712 		rte_errno = ENODEV;
713 		return -rte_errno;
714 	}
715 
716 	if (dev->ops.wmb == NULL) {
717 		rte_errno = ENOTSUP;
718 		return -rte_errno;
719 	}
720 	return GPU_DRV_RET(dev->ops.wmb(dev));
721 }
722 
723 int
724 rte_gpu_comm_create_flag(uint16_t dev_id, struct rte_gpu_comm_flag *devflag,
725 		enum rte_gpu_comm_flag_type mtype)
726 {
727 	size_t flag_size;
728 	int ret;
729 
730 	if (devflag == NULL) {
731 		rte_errno = EINVAL;
732 		return -rte_errno;
733 	}
734 	if (mtype != RTE_GPU_COMM_FLAG_CPU) {
735 		rte_errno = EINVAL;
736 		return -rte_errno;
737 	}
738 
739 	flag_size = sizeof(uint32_t);
740 
741 	devflag->ptr = rte_zmalloc(NULL, flag_size, 0);
742 	if (devflag->ptr == NULL) {
743 		rte_errno = ENOMEM;
744 		return -rte_errno;
745 	}
746 
747 	ret = rte_gpu_mem_register(dev_id, flag_size, devflag->ptr);
748 	if (ret < 0) {
749 		rte_errno = ENOMEM;
750 		return -rte_errno;
751 	}
752 
753 	devflag->mtype = mtype;
754 	devflag->dev_id = dev_id;
755 
756 	return 0;
757 }
758 
759 int
760 rte_gpu_comm_destroy_flag(struct rte_gpu_comm_flag *devflag)
761 {
762 	int ret;
763 
764 	if (devflag == NULL) {
765 		rte_errno = EINVAL;
766 		return -rte_errno;
767 	}
768 
769 	ret = rte_gpu_mem_unregister(devflag->dev_id, devflag->ptr);
770 	if (ret < 0) {
771 		rte_errno = EINVAL;
772 		return -1;
773 	}
774 
775 	rte_free(devflag->ptr);
776 
777 	return 0;
778 }
779 
780 int
781 rte_gpu_comm_set_flag(struct rte_gpu_comm_flag *devflag, uint32_t val)
782 {
783 	if (devflag == NULL) {
784 		rte_errno = EINVAL;
785 		return -rte_errno;
786 	}
787 
788 	if (devflag->mtype != RTE_GPU_COMM_FLAG_CPU) {
789 		rte_errno = EINVAL;
790 		return -rte_errno;
791 	}
792 
793 	RTE_GPU_VOLATILE(*devflag->ptr) = val;
794 
795 	return 0;
796 }
797 
798 int
799 rte_gpu_comm_get_flag_value(struct rte_gpu_comm_flag *devflag, uint32_t *val)
800 {
801 	if (devflag == NULL) {
802 		rte_errno = EINVAL;
803 		return -rte_errno;
804 	}
805 	if (devflag->mtype != RTE_GPU_COMM_FLAG_CPU) {
806 		rte_errno = EINVAL;
807 		return -rte_errno;
808 	}
809 
810 	*val = RTE_GPU_VOLATILE(*devflag->ptr);
811 
812 	return 0;
813 }
814 
815 struct rte_gpu_comm_list *
816 rte_gpu_comm_create_list(uint16_t dev_id,
817 		uint32_t num_comm_items)
818 {
819 	struct rte_gpu_comm_list *comm_list;
820 	uint32_t idx_l;
821 	int ret;
822 	struct rte_gpu *dev;
823 
824 	if (num_comm_items == 0) {
825 		rte_errno = EINVAL;
826 		return NULL;
827 	}
828 
829 	dev = gpu_get_by_id(dev_id);
830 	if (dev == NULL) {
831 		GPU_LOG(ERR, "memory barrier for invalid device ID %d", dev_id);
832 		rte_errno = ENODEV;
833 		return NULL;
834 	}
835 
836 	comm_list = rte_zmalloc(NULL,
837 			sizeof(struct rte_gpu_comm_list) * num_comm_items, 0);
838 	if (comm_list == NULL) {
839 		rte_errno = ENOMEM;
840 		return NULL;
841 	}
842 
843 	ret = rte_gpu_mem_register(dev_id,
844 			sizeof(struct rte_gpu_comm_list) * num_comm_items, comm_list);
845 	if (ret < 0) {
846 		rte_errno = ENOMEM;
847 		return NULL;
848 	}
849 
850 	for (idx_l = 0; idx_l < num_comm_items; idx_l++) {
851 		comm_list[idx_l].pkt_list = rte_zmalloc(NULL,
852 				sizeof(struct rte_gpu_comm_pkt) * RTE_GPU_COMM_LIST_PKTS_MAX, 0);
853 		if (comm_list[idx_l].pkt_list == NULL) {
854 			rte_errno = ENOMEM;
855 			return NULL;
856 		}
857 
858 		ret = rte_gpu_mem_register(dev_id,
859 				sizeof(struct rte_gpu_comm_pkt) * RTE_GPU_COMM_LIST_PKTS_MAX,
860 				comm_list[idx_l].pkt_list);
861 		if (ret < 0) {
862 			rte_errno = ENOMEM;
863 			return NULL;
864 		}
865 
866 		RTE_GPU_VOLATILE(comm_list[idx_l].status) = RTE_GPU_COMM_LIST_FREE;
867 		comm_list[idx_l].num_pkts = 0;
868 		comm_list[idx_l].dev_id = dev_id;
869 
870 		comm_list[idx_l].mbufs = rte_zmalloc(NULL,
871 				sizeof(struct rte_mbuf *) * RTE_GPU_COMM_LIST_PKTS_MAX, 0);
872 		if (comm_list[idx_l].mbufs == NULL) {
873 			rte_errno = ENOMEM;
874 			return NULL;
875 		}
876 	}
877 
878 	return comm_list;
879 }
880 
881 int
882 rte_gpu_comm_destroy_list(struct rte_gpu_comm_list *comm_list,
883 		uint32_t num_comm_items)
884 {
885 	uint32_t idx_l;
886 	int ret;
887 	uint16_t dev_id;
888 
889 	if (comm_list == NULL) {
890 		rte_errno = EINVAL;
891 		return -rte_errno;
892 	}
893 
894 	dev_id = comm_list[0].dev_id;
895 
896 	for (idx_l = 0; idx_l < num_comm_items; idx_l++) {
897 		ret = rte_gpu_mem_unregister(dev_id, comm_list[idx_l].pkt_list);
898 		if (ret < 0) {
899 			rte_errno = EINVAL;
900 			return -1;
901 		}
902 
903 		rte_free(comm_list[idx_l].pkt_list);
904 		rte_free(comm_list[idx_l].mbufs);
905 	}
906 
907 	ret = rte_gpu_mem_unregister(dev_id, comm_list);
908 	if (ret < 0) {
909 		rte_errno = EINVAL;
910 		return -1;
911 	}
912 
913 	rte_free(comm_list);
914 
915 	return 0;
916 }
917 
918 int
919 rte_gpu_comm_populate_list_pkts(struct rte_gpu_comm_list *comm_list_item,
920 		struct rte_mbuf **mbufs, uint32_t num_mbufs)
921 {
922 	uint32_t idx;
923 
924 	if (comm_list_item == NULL || comm_list_item->pkt_list == NULL ||
925 			mbufs == NULL || num_mbufs > RTE_GPU_COMM_LIST_PKTS_MAX) {
926 		rte_errno = EINVAL;
927 		return -rte_errno;
928 	}
929 
930 	for (idx = 0; idx < num_mbufs; idx++) {
931 		/* support only unchained mbufs */
932 		if (unlikely((mbufs[idx]->nb_segs > 1) ||
933 				(mbufs[idx]->next != NULL) ||
934 				(mbufs[idx]->data_len != mbufs[idx]->pkt_len))) {
935 			rte_errno = ENOTSUP;
936 			return -rte_errno;
937 		}
938 		comm_list_item->pkt_list[idx].addr =
939 				rte_pktmbuf_mtod_offset(mbufs[idx], uintptr_t, 0);
940 		comm_list_item->pkt_list[idx].size = mbufs[idx]->pkt_len;
941 		comm_list_item->mbufs[idx] = mbufs[idx];
942 	}
943 
944 	RTE_GPU_VOLATILE(comm_list_item->num_pkts) = num_mbufs;
945 	rte_gpu_wmb(comm_list_item->dev_id);
946 	RTE_GPU_VOLATILE(comm_list_item->status) = RTE_GPU_COMM_LIST_READY;
947 
948 	return 0;
949 }
950 
951 int
952 rte_gpu_comm_cleanup_list(struct rte_gpu_comm_list *comm_list_item)
953 {
954 	uint32_t idx = 0;
955 
956 	if (comm_list_item == NULL) {
957 		rte_errno = EINVAL;
958 		return -rte_errno;
959 	}
960 
961 	if (RTE_GPU_VOLATILE(comm_list_item->status) ==
962 			RTE_GPU_COMM_LIST_READY) {
963 		GPU_LOG(ERR, "packet list is still in progress");
964 		rte_errno = EINVAL;
965 		return -rte_errno;
966 	}
967 
968 	for (idx = 0; idx < RTE_GPU_COMM_LIST_PKTS_MAX; idx++) {
969 		if (comm_list_item->pkt_list[idx].addr == 0)
970 			break;
971 
972 		comm_list_item->pkt_list[idx].addr = 0;
973 		comm_list_item->pkt_list[idx].size = 0;
974 		comm_list_item->mbufs[idx] = NULL;
975 	}
976 
977 	RTE_GPU_VOLATILE(comm_list_item->status) = RTE_GPU_COMM_LIST_FREE;
978 	RTE_GPU_VOLATILE(comm_list_item->num_pkts) = 0;
979 	rte_mb();
980 
981 	return 0;
982 }
983