1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2021 NVIDIA Corporation & Affiliates 3 */ 4 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <string.h> 8 #include <stdint.h> 9 #include <inttypes.h> 10 #include <stdarg.h> 11 #include <errno.h> 12 #include <getopt.h> 13 14 #include <rte_common.h> 15 #include <rte_malloc.h> 16 #include <rte_memory.h> 17 #include <rte_eal.h> 18 #include <rte_ether.h> 19 #include <rte_ethdev.h> 20 #include <rte_mempool.h> 21 #include <rte_mbuf.h> 22 23 #include <rte_gpudev.h> 24 25 enum app_args { 26 ARG_HELP, 27 ARG_MEMPOOL 28 }; 29 30 static void 31 usage(const char *prog_name) 32 { 33 printf("%s [EAL options] --\n", 34 prog_name); 35 } 36 37 static void 38 args_parse(int argc, char **argv) 39 { 40 char **argvopt; 41 int opt; 42 int opt_idx; 43 44 static struct option lgopts[] = { 45 { "help", 0, 0, ARG_HELP}, 46 /* End of options */ 47 { 0, 0, 0, 0 } 48 }; 49 50 argvopt = argv; 51 while ((opt = getopt_long(argc, argvopt, "", 52 lgopts, &opt_idx)) != EOF) { 53 switch (opt) { 54 case ARG_HELP: 55 usage(argv[0]); 56 break; 57 default: 58 usage(argv[0]); 59 rte_exit(EXIT_FAILURE, "Invalid option: %s\n", argv[optind]); 60 break; 61 } 62 } 63 } 64 65 static int 66 alloc_gpu_memory(uint16_t gpu_id) 67 { 68 void *ptr_1 = NULL; 69 void *ptr_2 = NULL; 70 size_t buf_bytes = 1024; 71 unsigned int align = 4096; 72 int ret; 73 74 printf("\n=======> TEST: Allocate GPU memory\n\n"); 75 76 /* Alloc memory on GPU 0 without any specific alignment */ 77 ptr_1 = rte_gpu_mem_alloc(gpu_id, buf_bytes, 0); 78 if (ptr_1 == NULL) { 79 fprintf(stderr, "rte_gpu_mem_alloc GPU memory returned error\n"); 80 goto error; 81 } 82 printf("GPU memory allocated at 0x%p size is %zd bytes\n", 83 ptr_1, buf_bytes); 84 85 /* Alloc memory on GPU 0 with 4kB alignment */ 86 ptr_2 = rte_gpu_mem_alloc(gpu_id, buf_bytes, align); 87 if (ptr_2 == NULL) { 88 fprintf(stderr, "rte_gpu_mem_alloc GPU memory returned error\n"); 89 goto error; 90 } 91 printf("GPU memory allocated at 0x%p size is %zd bytes\n", 92 ptr_2, buf_bytes); 93 94 if (((uintptr_t)ptr_2) % align) { 95 fprintf(stderr, "Memory address 0x%p is not aligned to %u\n", ptr_2, align); 96 goto error; 97 } 98 99 ret = rte_gpu_mem_free(gpu_id, (uint8_t *)(ptr_1)+0x700); 100 if (ret < 0) { 101 printf("GPU memory 0x%p NOT freed: GPU driver didn't find this memory address internally.\n", 102 (uint8_t *)(ptr_1)+0x700); 103 } else { 104 fprintf(stderr, "ERROR: rte_gpu_mem_free freed GPU memory 0x%p\n", 105 (uint8_t *)(ptr_1)+0x700); 106 goto error; 107 } 108 109 ret = rte_gpu_mem_free(gpu_id, ptr_2); 110 if (ret < 0) { 111 fprintf(stderr, "rte_gpu_mem_free returned error %d\n", ret); 112 goto error; 113 } 114 printf("GPU memory 0x%p freed\n", ptr_2); 115 116 ret = rte_gpu_mem_free(gpu_id, ptr_1); 117 if (ret < 0) { 118 fprintf(stderr, "rte_gpu_mem_free returned error %d\n", ret); 119 goto error; 120 } 121 printf("GPU memory 0x%p freed\n", ptr_1); 122 123 printf("\n=======> TEST: PASSED\n"); 124 return 0; 125 126 error: 127 128 rte_gpu_mem_free(gpu_id, ptr_1); 129 rte_gpu_mem_free(gpu_id, ptr_2); 130 131 printf("\n=======> TEST: FAILED\n"); 132 return -1; 133 } 134 135 static int 136 register_cpu_memory(uint16_t gpu_id) 137 { 138 void *ptr = NULL; 139 size_t buf_bytes = 1024; 140 int ret; 141 142 printf("\n=======> TEST: Register CPU memory\n\n"); 143 144 /* Alloc memory on CPU visible from GPU 0 */ 145 ptr = rte_zmalloc(NULL, buf_bytes, 0); 146 if (ptr == NULL) { 147 fprintf(stderr, "Failed to allocate CPU memory.\n"); 148 goto error; 149 } 150 151 ret = rte_gpu_mem_register(gpu_id, buf_bytes, ptr); 152 if (ret < 0) { 153 fprintf(stderr, "rte_gpu_mem_register CPU memory returned error %d\n", ret); 154 goto error; 155 } 156 printf("CPU memory registered at 0x%p %zdB\n", ptr, buf_bytes); 157 158 ret = rte_gpu_mem_unregister(gpu_id, (uint8_t *)(ptr)+0x700); 159 if (ret < 0) { 160 printf("CPU memory 0x%p NOT unregistered: GPU driver didn't find this memory address internally\n", 161 (uint8_t *)(ptr)+0x700); 162 } else { 163 fprintf(stderr, "ERROR: rte_gpu_mem_unregister unregistered GPU memory 0x%p\n", 164 (uint8_t *)(ptr)+0x700); 165 goto error; 166 } 167 168 ret = rte_gpu_mem_unregister(gpu_id, ptr); 169 if (ret < 0) { 170 fprintf(stderr, "rte_gpu_mem_unregister returned error %d\n", ret); 171 goto error; 172 } 173 printf("CPU memory 0x%p unregistered\n", ptr); 174 175 rte_free(ptr); 176 177 printf("\n=======> TEST: PASSED\n"); 178 return 0; 179 180 error: 181 182 rte_gpu_mem_unregister(gpu_id, ptr); 183 rte_free(ptr); 184 printf("\n=======> TEST: FAILED\n"); 185 return -1; 186 } 187 188 static int 189 gpu_mem_cpu_map(uint16_t gpu_id) 190 { 191 void *ptr_gpu = NULL; 192 void *ptr_cpu = NULL; 193 size_t buf_bytes = 1024; 194 unsigned int align = 4096; 195 int ret; 196 197 printf("\n=======> TEST: Map GPU memory for CPU visibility\n\n"); 198 199 /* Alloc memory on GPU 0 with 4kB alignment */ 200 ptr_gpu = rte_gpu_mem_alloc(gpu_id, buf_bytes, align); 201 if (ptr_gpu == NULL) { 202 fprintf(stderr, "rte_gpu_mem_alloc GPU memory returned error\n"); 203 goto error; 204 } 205 printf("GPU memory allocated at 0x%p size is %zd bytes\n", 206 ptr_gpu, buf_bytes); 207 208 ptr_cpu = rte_gpu_mem_cpu_map(gpu_id, buf_bytes, ptr_gpu); 209 if (ptr_cpu == NULL) { 210 fprintf(stderr, "rte_gpu_mem_cpu_map returned error\n"); 211 goto error; 212 } 213 printf("GPU memory CPU mapped at 0x%p\n", ptr_cpu); 214 215 ((uint8_t *)ptr_cpu)[0] = 0x4; 216 ((uint8_t *)ptr_cpu)[1] = 0x5; 217 ((uint8_t *)ptr_cpu)[2] = 0x6; 218 219 printf("GPU memory first 3 bytes set from CPU: %x %x %x\n", 220 ((uint8_t *)ptr_cpu)[0], 221 ((uint8_t *)ptr_cpu)[1], 222 ((uint8_t *)ptr_cpu)[2]); 223 224 ret = rte_gpu_mem_cpu_unmap(gpu_id, ptr_gpu); 225 if (ret < 0) { 226 fprintf(stderr, "rte_gpu_mem_cpu_unmap returned error %d\n", ret); 227 goto error; 228 } 229 printf("GPU memory CPU unmapped, 0x%p not valid anymore\n", ptr_cpu); 230 231 ret = rte_gpu_mem_free(gpu_id, ptr_gpu); 232 if (ret < 0) { 233 fprintf(stderr, "rte_gpu_mem_free returned error %d\n", ret); 234 goto error; 235 } 236 printf("GPU memory 0x%p freed\n", ptr_gpu); 237 238 printf("\n=======> TEST: PASSED\n"); 239 return 0; 240 241 error: 242 243 rte_gpu_mem_cpu_unmap(gpu_id, ptr_gpu); 244 rte_gpu_mem_free(gpu_id, ptr_gpu); 245 246 printf("\n=======> TEST: FAILED\n"); 247 return -1; 248 } 249 250 static int 251 create_update_comm_flag(uint16_t gpu_id) 252 { 253 struct rte_gpu_comm_flag devflag; 254 int ret = 0; 255 uint32_t set_val; 256 uint32_t get_val; 257 258 printf("\n=======> TEST: Communication flag\n\n"); 259 260 ret = rte_gpu_comm_create_flag(gpu_id, &devflag, RTE_GPU_COMM_FLAG_CPU); 261 if (ret < 0) { 262 fprintf(stderr, "rte_gpu_comm_create_flag returned error %d\n", ret); 263 goto error; 264 } 265 266 set_val = 25; 267 ret = rte_gpu_comm_set_flag(&devflag, set_val); 268 if (ret < 0) { 269 fprintf(stderr, "rte_gpu_comm_set_flag returned error %d\n", ret); 270 goto error; 271 } 272 273 ret = rte_gpu_comm_get_flag_value(&devflag, &get_val); 274 if (ret < 0) { 275 fprintf(stderr, "rte_gpu_comm_get_flag_value returned error %d\n", ret); 276 goto error; 277 } 278 279 printf("Communication flag value at 0x%p was set to %d and current value is %d\n", 280 devflag.ptr, set_val, get_val); 281 282 set_val = 38; 283 ret = rte_gpu_comm_set_flag(&devflag, set_val); 284 if (ret < 0) { 285 fprintf(stderr, "rte_gpu_comm_set_flag returned error %d\n", ret); 286 goto error; 287 } 288 289 ret = rte_gpu_comm_get_flag_value(&devflag, &get_val); 290 if (ret < 0) { 291 fprintf(stderr, "rte_gpu_comm_get_flag_value returned error %d\n", ret); 292 goto error; 293 } 294 295 printf("Communication flag value at 0x%p was set to %d and current value is %d\n", 296 devflag.ptr, set_val, get_val); 297 298 ret = rte_gpu_comm_destroy_flag(&devflag); 299 if (ret < 0) { 300 fprintf(stderr, "rte_gpu_comm_destroy_flags returned error %d\n", ret); 301 goto error; 302 } 303 304 printf("\n=======> TEST: PASSED\n"); 305 return 0; 306 307 error: 308 309 rte_gpu_comm_destroy_flag(&devflag); 310 printf("\n=======> TEST: FAILED\n"); 311 return -1; 312 } 313 314 static int 315 simulate_gpu_task(struct rte_gpu_comm_list *comm_list_item, int num_pkts) 316 { 317 int idx; 318 319 if (comm_list_item == NULL) 320 return -1; 321 322 for (idx = 0; idx < num_pkts; idx++) { 323 /** 324 * consume(comm_list_item->pkt_list[idx].addr); 325 */ 326 } 327 /* 328 * A real GPU workload function can't directly call rte_gpu_comm_set_status 329 * because it's a CPU-only function. 330 * A real GPU workload should implement the content 331 * of rte_gpu_comm_set_status() in GPU specific code. 332 */ 333 rte_gpu_comm_set_status(comm_list_item, RTE_GPU_COMM_LIST_DONE); 334 335 return 0; 336 } 337 338 static int 339 create_update_comm_list(uint16_t gpu_id) 340 { 341 int ret = 0; 342 int i = 0; 343 struct rte_gpu_comm_list *comm_list = NULL; 344 uint32_t num_comm_items = 1024; 345 struct rte_mbuf *mbufs[10]; 346 347 printf("\n=======> TEST: Communication list\n\n"); 348 349 comm_list = rte_gpu_comm_create_list(gpu_id, num_comm_items); 350 if (comm_list == NULL) { 351 fprintf(stderr, "rte_gpu_comm_create_list returned error %d\n", ret); 352 goto error; 353 } 354 355 /** 356 * Simulate DPDK receive functions like rte_eth_rx_burst() 357 */ 358 for (i = 0; i < 10; i++) { 359 mbufs[i] = rte_zmalloc(NULL, sizeof(struct rte_mbuf), 0); 360 if (mbufs[i] == NULL) { 361 fprintf(stderr, "Failed to allocate fake mbufs in CPU memory.\n"); 362 goto error; 363 } 364 365 memset(mbufs[i], 0, sizeof(struct rte_mbuf)); 366 } 367 368 /** 369 * Populate just the first item of the list 370 */ 371 ret = rte_gpu_comm_populate_list_pkts(&(comm_list[0]), mbufs, 10); 372 if (ret < 0) { 373 fprintf(stderr, "rte_gpu_comm_populate_list_pkts returned error %d\n", ret); 374 goto error; 375 } 376 377 ret = rte_gpu_comm_cleanup_list(&(comm_list[0])); 378 if (ret == 0) { 379 fprintf(stderr, "rte_gpu_comm_cleanup_list erroneously cleaned the list even if packets have not been consumed yet\n"); 380 goto error; 381 } 382 printf("Communication list not cleaned because packets have not been consumed yet.\n"); 383 384 /** 385 * Simulate a GPU tasks going through the packet list to consume 386 * mbufs packets and release them 387 */ 388 printf("Consuming packets...\n"); 389 simulate_gpu_task(&(comm_list[0]), 10); 390 391 /** 392 * Packets have been consumed, now the communication item 393 * and the related mbufs can be all released 394 */ 395 ret = rte_gpu_comm_cleanup_list(&(comm_list[0])); 396 if (ret < 0) { 397 fprintf(stderr, "rte_gpu_comm_cleanup_list returned error %d\n", ret); 398 goto error; 399 } 400 401 printf("Communication list cleaned because packets have been consumed now.\n"); 402 403 ret = rte_gpu_comm_destroy_list(comm_list, num_comm_items); 404 if (ret < 0) { 405 fprintf(stderr, "rte_gpu_comm_destroy_list returned error %d\n", ret); 406 goto error; 407 } 408 409 for (i = 0; i < 10; i++) 410 rte_free(mbufs[i]); 411 412 printf("\n=======> TEST: PASSED\n"); 413 return 0; 414 415 error: 416 417 rte_gpu_comm_destroy_list(comm_list, num_comm_items); 418 for (i = 0; i < 10; i++) 419 rte_free(mbufs[i]); 420 printf("\n=======> TEST: FAILED\n"); 421 return -1; 422 } 423 424 int 425 main(int argc, char **argv) 426 { 427 int ret; 428 int nb_gpus = 0; 429 int16_t gpu_id = 0; 430 struct rte_gpu_info ginfo; 431 432 /* Init EAL. */ 433 ret = rte_eal_init(argc, argv); 434 if (ret < 0) 435 rte_exit(EXIT_FAILURE, "EAL init failed\n"); 436 argc -= ret; 437 argv += ret; 438 if (argc > 1) 439 args_parse(argc, argv); 440 argc -= ret; 441 argv += ret; 442 443 nb_gpus = rte_gpu_count_avail(); 444 printf("\n\nDPDK found %d GPUs:\n", nb_gpus); 445 RTE_GPU_FOREACH(gpu_id) 446 { 447 if (rte_gpu_info_get(gpu_id, &ginfo)) 448 rte_exit(EXIT_FAILURE, "rte_gpu_info_get error - bye\n"); 449 450 printf("\tGPU ID %d\n\t\tparent ID %d GPU Bus ID %s NUMA node %d Tot memory %.02f MB, Tot processors %d\n", 451 ginfo.dev_id, 452 ginfo.parent, 453 ginfo.name, 454 ginfo.numa_node, 455 (((float)ginfo.total_memory)/(float)1024)/(float)1024, 456 ginfo.processor_count 457 ); 458 } 459 printf("\n\n"); 460 461 if (nb_gpus == 0) { 462 fprintf(stderr, "Need at least one GPU on the system to run the example\n"); 463 return EXIT_FAILURE; 464 } 465 466 gpu_id = 0; 467 468 /** 469 * Memory tests 470 */ 471 alloc_gpu_memory(gpu_id); 472 register_cpu_memory(gpu_id); 473 gpu_mem_cpu_map(gpu_id); 474 475 /** 476 * Communication items test 477 */ 478 create_update_comm_flag(gpu_id); 479 create_update_comm_list(gpu_id); 480 481 /* clean up the EAL */ 482 rte_eal_cleanup(); 483 484 return EXIT_SUCCESS; 485 } 486