xref: /dpdk/app/test-gpudev/main.c (revision 9b8cae4d991ee4fb0faf92d8994879d6776df181)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright (c) 2021 NVIDIA Corporation & Affiliates
3  */
4 
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <stdint.h>
9 #include <inttypes.h>
10 #include <stdarg.h>
11 #include <errno.h>
12 #include <getopt.h>
13 
14 #include <rte_common.h>
15 #include <rte_malloc.h>
16 #include <rte_memory.h>
17 #include <rte_eal.h>
18 #include <rte_ether.h>
19 #include <rte_ethdev.h>
20 #include <rte_mempool.h>
21 #include <rte_mbuf.h>
22 
23 #include <rte_gpudev.h>
24 
25 enum app_args {
26 	ARG_HELP,
27 	ARG_MEMPOOL
28 };
29 
30 static void
usage(const char * prog_name)31 usage(const char *prog_name)
32 {
33 	printf("%s [EAL options] --\n",
34 		prog_name);
35 }
36 
37 static void
args_parse(int argc,char ** argv)38 args_parse(int argc, char **argv)
39 {
40 	char **argvopt;
41 	int opt;
42 	int opt_idx;
43 
44 	static struct option lgopts[] = {
45 		{ "help", 0, 0, ARG_HELP},
46 		/* End of options */
47 		{ 0, 0, 0, 0 }
48 	};
49 
50 	argvopt = argv;
51 	while ((opt = getopt_long(argc, argvopt, "",
52 				lgopts, &opt_idx)) != EOF) {
53 		switch (opt) {
54 		case ARG_HELP:
55 			usage(argv[0]);
56 			break;
57 		default:
58 			usage(argv[0]);
59 			rte_exit(EXIT_FAILURE, "Invalid option: %s\n", argv[optind]);
60 			break;
61 		}
62 	}
63 }
64 
65 static int
alloc_gpu_memory(uint16_t gpu_id)66 alloc_gpu_memory(uint16_t gpu_id)
67 {
68 	void *ptr_1 = NULL;
69 	void *ptr_2 = NULL;
70 	size_t buf_bytes = 1024;
71 	unsigned int align = 4096;
72 	int ret;
73 
74 	printf("\n=======> TEST: Allocate GPU memory\n\n");
75 
76 	/* Alloc memory on GPU 0 without any specific alignment */
77 	ptr_1 = rte_gpu_mem_alloc(gpu_id, buf_bytes, 0);
78 	if (ptr_1 == NULL) {
79 		fprintf(stderr, "rte_gpu_mem_alloc GPU memory returned error\n");
80 		goto error;
81 	}
82 	printf("GPU memory allocated at 0x%p size is %zd bytes\n",
83 			ptr_1, buf_bytes);
84 
85 	/* Alloc memory on GPU 0 with 4kB alignment */
86 	ptr_2 = rte_gpu_mem_alloc(gpu_id, buf_bytes, align);
87 	if (ptr_2 == NULL) {
88 		fprintf(stderr, "rte_gpu_mem_alloc GPU memory returned error\n");
89 		goto error;
90 	}
91 	printf("GPU memory allocated at 0x%p size is %zd bytes\n",
92 			ptr_2, buf_bytes);
93 
94 	if (((uintptr_t)ptr_2) % align) {
95 		fprintf(stderr, "Memory address 0x%p is not aligned to %u\n", ptr_2, align);
96 		goto error;
97 	}
98 
99 	ret = rte_gpu_mem_free(gpu_id, (uint8_t *)(ptr_1)+0x700);
100 	if (ret < 0) {
101 		printf("GPU memory 0x%p NOT freed: GPU driver didn't find this memory address internally.\n",
102 				(uint8_t *)(ptr_1)+0x700);
103 	} else {
104 		fprintf(stderr, "ERROR: rte_gpu_mem_free freed GPU memory 0x%p\n",
105 				(uint8_t *)(ptr_1)+0x700);
106 		goto error;
107 	}
108 
109 	ret = rte_gpu_mem_free(gpu_id, ptr_2);
110 	if (ret < 0) {
111 		fprintf(stderr, "rte_gpu_mem_free returned error %d\n", ret);
112 		goto error;
113 	}
114 	printf("GPU memory 0x%p freed\n", ptr_2);
115 
116 	ret = rte_gpu_mem_free(gpu_id, ptr_1);
117 	if (ret < 0) {
118 		fprintf(stderr, "rte_gpu_mem_free returned error %d\n", ret);
119 		goto error;
120 	}
121 	printf("GPU memory 0x%p freed\n", ptr_1);
122 
123 	printf("\n=======> TEST: PASSED\n");
124 	return 0;
125 
126 error:
127 
128 	rte_gpu_mem_free(gpu_id, ptr_1);
129 	rte_gpu_mem_free(gpu_id, ptr_2);
130 
131 	printf("\n=======> TEST: FAILED\n");
132 	return -1;
133 }
134 
135 static int
register_cpu_memory(uint16_t gpu_id)136 register_cpu_memory(uint16_t gpu_id)
137 {
138 	void *ptr = NULL;
139 	size_t buf_bytes = 1024;
140 	int ret;
141 
142 	printf("\n=======> TEST: Register CPU memory\n\n");
143 
144 	/* Alloc memory on CPU visible from GPU 0 */
145 	ptr = rte_zmalloc(NULL, buf_bytes, 0);
146 	if (ptr == NULL) {
147 		fprintf(stderr, "Failed to allocate CPU memory.\n");
148 		goto error;
149 	}
150 
151 	ret = rte_gpu_mem_register(gpu_id, buf_bytes, ptr);
152 	if (ret < 0) {
153 		fprintf(stderr, "rte_gpu_mem_register CPU memory returned error %d\n", ret);
154 		goto error;
155 	}
156 	printf("CPU memory registered at 0x%p %zdB\n", ptr, buf_bytes);
157 
158 	ret = rte_gpu_mem_unregister(gpu_id, (uint8_t *)(ptr)+0x700);
159 	if (ret < 0) {
160 		printf("CPU memory 0x%p NOT unregistered: GPU driver didn't find this memory address internally\n",
161 				(uint8_t *)(ptr)+0x700);
162 	} else {
163 		fprintf(stderr, "ERROR: rte_gpu_mem_unregister unregistered GPU memory 0x%p\n",
164 				(uint8_t *)(ptr)+0x700);
165 		goto error;
166 	}
167 
168 	ret = rte_gpu_mem_unregister(gpu_id, ptr);
169 	if (ret < 0) {
170 		fprintf(stderr, "rte_gpu_mem_unregister returned error %d\n", ret);
171 		goto error;
172 	}
173 	printf("CPU memory 0x%p unregistered\n", ptr);
174 
175 	rte_free(ptr);
176 
177 	printf("\n=======> TEST: PASSED\n");
178 	return 0;
179 
180 error:
181 
182 	rte_gpu_mem_unregister(gpu_id, ptr);
183 	rte_free(ptr);
184 	printf("\n=======> TEST: FAILED\n");
185 	return -1;
186 }
187 
188 static int
gpu_mem_cpu_map(uint16_t gpu_id)189 gpu_mem_cpu_map(uint16_t gpu_id)
190 {
191 	void *ptr_gpu = NULL;
192 	void *ptr_cpu = NULL;
193 	size_t buf_bytes = 1024;
194 	unsigned int align = 4096;
195 	int ret;
196 
197 	printf("\n=======> TEST: Map GPU memory for CPU visibility\n\n");
198 
199 	/* Alloc memory on GPU 0 with 4kB alignment */
200 	ptr_gpu = rte_gpu_mem_alloc(gpu_id, buf_bytes, align);
201 	if (ptr_gpu == NULL) {
202 		fprintf(stderr, "rte_gpu_mem_alloc GPU memory returned error\n");
203 		goto error;
204 	}
205 	printf("GPU memory allocated at 0x%p size is %zd bytes\n",
206 			ptr_gpu, buf_bytes);
207 
208 	ptr_cpu = rte_gpu_mem_cpu_map(gpu_id, buf_bytes, ptr_gpu);
209 	if (ptr_cpu == NULL) {
210 		fprintf(stderr, "rte_gpu_mem_cpu_map returned error\n");
211 		goto error;
212 	}
213 	printf("GPU memory CPU mapped at 0x%p\n", ptr_cpu);
214 
215 	((uint8_t *)ptr_cpu)[0] = 0x4;
216 	((uint8_t *)ptr_cpu)[1] = 0x5;
217 	((uint8_t *)ptr_cpu)[2] = 0x6;
218 
219 	printf("GPU memory first 3 bytes set from CPU: %x %x %x\n",
220 			((uint8_t *)ptr_cpu)[0],
221 			((uint8_t *)ptr_cpu)[1],
222 			((uint8_t *)ptr_cpu)[2]);
223 
224 	ret = rte_gpu_mem_cpu_unmap(gpu_id, ptr_gpu);
225 	if (ret < 0) {
226 		fprintf(stderr, "rte_gpu_mem_cpu_unmap returned error %d\n", ret);
227 		goto error;
228 	}
229 	printf("GPU memory CPU unmapped, 0x%p not valid anymore\n", ptr_cpu);
230 
231 	ret = rte_gpu_mem_free(gpu_id, ptr_gpu);
232 	if (ret < 0) {
233 		fprintf(stderr, "rte_gpu_mem_free returned error %d\n", ret);
234 		goto error;
235 	}
236 	printf("GPU memory 0x%p freed\n", ptr_gpu);
237 
238 	printf("\n=======> TEST: PASSED\n");
239 	return 0;
240 
241 error:
242 
243 	rte_gpu_mem_cpu_unmap(gpu_id, ptr_gpu);
244 	rte_gpu_mem_free(gpu_id, ptr_gpu);
245 
246 	printf("\n=======> TEST: FAILED\n");
247 	return -1;
248 }
249 
250 static int
create_update_comm_flag(uint16_t gpu_id)251 create_update_comm_flag(uint16_t gpu_id)
252 {
253 	struct rte_gpu_comm_flag devflag;
254 	int ret = 0;
255 	uint32_t set_val;
256 	uint32_t get_val;
257 
258 	printf("\n=======> TEST: Communication flag\n\n");
259 
260 	ret = rte_gpu_comm_create_flag(gpu_id, &devflag, RTE_GPU_COMM_FLAG_CPU);
261 	if (ret < 0) {
262 		fprintf(stderr, "rte_gpu_comm_create_flag returned error %d\n", ret);
263 		goto error;
264 	}
265 
266 	set_val = 25;
267 	ret = rte_gpu_comm_set_flag(&devflag, set_val);
268 	if (ret < 0) {
269 		fprintf(stderr, "rte_gpu_comm_set_flag returned error %d\n", ret);
270 		goto error;
271 	}
272 
273 	ret = rte_gpu_comm_get_flag_value(&devflag, &get_val);
274 	if (ret < 0) {
275 		fprintf(stderr, "rte_gpu_comm_get_flag_value returned error %d\n", ret);
276 		goto error;
277 	}
278 
279 	printf("Communication flag value at 0x%p was set to %d and current value is %d\n",
280 			devflag.ptr, set_val, get_val);
281 
282 	set_val = 38;
283 	ret = rte_gpu_comm_set_flag(&devflag, set_val);
284 	if (ret < 0) {
285 		fprintf(stderr, "rte_gpu_comm_set_flag returned error %d\n", ret);
286 		goto error;
287 	}
288 
289 	ret = rte_gpu_comm_get_flag_value(&devflag, &get_val);
290 	if (ret < 0) {
291 		fprintf(stderr, "rte_gpu_comm_get_flag_value returned error %d\n", ret);
292 		goto error;
293 	}
294 
295 	printf("Communication flag value at 0x%p was set to %d and current value is %d\n",
296 			devflag.ptr, set_val, get_val);
297 
298 	ret = rte_gpu_comm_destroy_flag(&devflag);
299 	if (ret < 0) {
300 		fprintf(stderr, "rte_gpu_comm_destroy_flags returned error %d\n", ret);
301 		goto error;
302 	}
303 
304 	printf("\n=======> TEST: PASSED\n");
305 	return 0;
306 
307 error:
308 
309 	rte_gpu_comm_destroy_flag(&devflag);
310 	printf("\n=======> TEST: FAILED\n");
311 	return -1;
312 }
313 
314 static int
simulate_gpu_task(struct rte_gpu_comm_list * comm_list_item,int num_pkts)315 simulate_gpu_task(struct rte_gpu_comm_list *comm_list_item, int num_pkts)
316 {
317 	int idx;
318 
319 	if (comm_list_item == NULL)
320 		return -1;
321 
322 	for (idx = 0; idx < num_pkts; idx++) {
323 		/**
324 		 * consume(comm_list_item->pkt_list[idx].addr);
325 		 */
326 	}
327 	/*
328 	 * A real GPU workload function can't directly call rte_gpu_comm_set_status
329 	 * because it's a CPU-only function.
330 	 * A real GPU workload should implement the content
331 	 * of rte_gpu_comm_set_status() in GPU specific code.
332 	 */
333 	rte_gpu_comm_set_status(comm_list_item, RTE_GPU_COMM_LIST_DONE);
334 
335 	return 0;
336 }
337 
338 static int
create_update_comm_list(uint16_t gpu_id)339 create_update_comm_list(uint16_t gpu_id)
340 {
341 	int ret = 0;
342 	int i = 0;
343 	struct rte_gpu_comm_list *comm_list = NULL;
344 	uint32_t num_comm_items = 1024;
345 	struct rte_mbuf *mbufs[10];
346 
347 	printf("\n=======> TEST: Communication list\n\n");
348 
349 	comm_list = rte_gpu_comm_create_list(gpu_id, num_comm_items);
350 	if (comm_list == NULL) {
351 		fprintf(stderr, "rte_gpu_comm_create_list returned error %d\n", ret);
352 		goto error;
353 	}
354 
355 	/**
356 	 * Simulate DPDK receive functions like rte_eth_rx_burst()
357 	 */
358 	for (i = 0; i < 10; i++) {
359 		mbufs[i] = rte_zmalloc(NULL, sizeof(struct rte_mbuf), 0);
360 		if (mbufs[i] == NULL) {
361 			fprintf(stderr, "Failed to allocate fake mbufs in CPU memory.\n");
362 			goto error;
363 		}
364 
365 		memset(mbufs[i], 0, sizeof(struct rte_mbuf));
366 	}
367 
368 	/**
369 	 * Populate just the first item of  the list
370 	 */
371 	ret = rte_gpu_comm_populate_list_pkts(&(comm_list[0]), mbufs, 10);
372 	if (ret < 0) {
373 		fprintf(stderr, "rte_gpu_comm_populate_list_pkts returned error %d\n", ret);
374 		goto error;
375 	}
376 
377 	ret = rte_gpu_comm_cleanup_list(&(comm_list[0]));
378 	if (ret == 0) {
379 		fprintf(stderr, "rte_gpu_comm_cleanup_list erroneously cleaned the list even if packets have not been consumed yet\n");
380 		goto error;
381 	}
382 	printf("Communication list not cleaned because packets have not been consumed yet.\n");
383 
384 	/**
385 	 * Simulate a GPU tasks going through the packet list to consume
386 	 * mbufs packets and release them
387 	 */
388 	printf("Consuming packets...\n");
389 	simulate_gpu_task(&(comm_list[0]), 10);
390 
391 	/**
392 	 * Packets have been consumed, now the communication item
393 	 * and the related mbufs can be all released
394 	 */
395 	ret = rte_gpu_comm_cleanup_list(&(comm_list[0]));
396 	if (ret < 0) {
397 		fprintf(stderr, "rte_gpu_comm_cleanup_list returned error %d\n", ret);
398 		goto error;
399 	}
400 
401 	printf("Communication list cleaned because packets have been consumed now.\n");
402 
403 	ret = rte_gpu_comm_destroy_list(comm_list, num_comm_items);
404 	if (ret < 0) {
405 		fprintf(stderr, "rte_gpu_comm_destroy_list returned error %d\n", ret);
406 		goto error;
407 	}
408 
409 	for (i = 0; i < 10; i++)
410 		rte_free(mbufs[i]);
411 
412 	printf("\n=======> TEST: PASSED\n");
413 	return 0;
414 
415 error:
416 
417 	rte_gpu_comm_destroy_list(comm_list, num_comm_items);
418 	for (i = 0; i < 10; i++)
419 		rte_free(mbufs[i]);
420 	printf("\n=======> TEST: FAILED\n");
421 	return -1;
422 }
423 
424 int
main(int argc,char ** argv)425 main(int argc, char **argv)
426 {
427 	int ret;
428 	int nb_gpus = 0;
429 	int16_t gpu_id = 0;
430 	struct rte_gpu_info ginfo;
431 
432 	/* Init EAL. */
433 	ret = rte_eal_init(argc, argv);
434 	if (ret < 0)
435 		rte_exit(EXIT_FAILURE, "EAL init failed\n");
436 	argc -= ret;
437 	argv += ret;
438 	if (argc > 1)
439 		args_parse(argc, argv);
440 	argc -= ret;
441 	argv += ret;
442 
443 	nb_gpus = rte_gpu_count_avail();
444 	printf("\n\nDPDK found %d GPUs:\n", nb_gpus);
445 	RTE_GPU_FOREACH(gpu_id)
446 	{
447 		if (rte_gpu_info_get(gpu_id, &ginfo))
448 			rte_exit(EXIT_FAILURE, "rte_gpu_info_get error - bye\n");
449 
450 		printf("\tGPU ID %d\n\t\tparent ID %d GPU Bus ID %s NUMA node %d Tot memory %.02f MB, Tot processors %d\n",
451 				ginfo.dev_id,
452 				ginfo.parent,
453 				ginfo.name,
454 				ginfo.numa_node,
455 				(((float)ginfo.total_memory)/(float)1024)/(float)1024,
456 				ginfo.processor_count
457 			);
458 	}
459 	printf("\n\n");
460 
461 	if (nb_gpus == 0) {
462 		fprintf(stderr, "Need at least one GPU on the system to run the example\n");
463 		return EXIT_FAILURE;
464 	}
465 
466 	gpu_id = 0;
467 
468 	/**
469 	 * Memory tests
470 	 */
471 	alloc_gpu_memory(gpu_id);
472 	register_cpu_memory(gpu_id);
473 	gpu_mem_cpu_map(gpu_id);
474 
475 	/**
476 	 * Communication items test
477 	 */
478 	create_update_comm_flag(gpu_id);
479 	create_update_comm_list(gpu_id);
480 
481 	/* clean up the EAL */
482 	rte_eal_cleanup();
483 
484 	return EXIT_SUCCESS;
485 }
486