1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2016-2017 Intel Corporation 3 */ 4 5 #include <getopt.h> 6 #include <stdint.h> 7 #include <stdio.h> 8 #include <signal.h> 9 #include <sched.h> 10 11 #include "pipeline_common.h" 12 13 struct fastpath_data *fdata; 14 15 struct config_data cdata = { 16 .num_packets = (1L << 25), /* do ~32M packets */ 17 .num_fids = 512, 18 .queue_type = RTE_SCHED_TYPE_ATOMIC, 19 .next_qid = {-1}, 20 .qid = {-1}, 21 .num_stages = 1, 22 .worker_cq_depth = 16 23 }; 24 25 static bool 26 core_in_use(unsigned int lcore_id) { 27 return (fdata->rx_core[lcore_id] || fdata->sched_core[lcore_id] || 28 fdata->tx_core[lcore_id] || fdata->worker_core[lcore_id]); 29 } 30 31 /* 32 * Parse the coremask given as argument (hexadecimal string) and fill 33 * the global configuration (core role and core count) with the parsed 34 * value. 35 */ 36 static int xdigit2val(unsigned char c) 37 { 38 int val; 39 40 if (isdigit(c)) 41 val = c - '0'; 42 else if (isupper(c)) 43 val = c - 'A' + 10; 44 else 45 val = c - 'a' + 10; 46 return val; 47 } 48 49 static uint64_t 50 parse_coremask(const char *coremask) 51 { 52 int i, j, idx = 0; 53 unsigned int count = 0; 54 char c; 55 int val; 56 uint64_t mask = 0; 57 const int32_t BITS_HEX = 4; 58 59 if (coremask == NULL) 60 return -1; 61 /* Remove all blank characters ahead and after . 62 * Remove 0x/0X if exists. 63 */ 64 while (isblank(*coremask)) 65 coremask++; 66 if (coremask[0] == '0' && ((coremask[1] == 'x') 67 || (coremask[1] == 'X'))) 68 coremask += 2; 69 i = strlen(coremask); 70 while ((i > 0) && isblank(coremask[i - 1])) 71 i--; 72 if (i == 0) 73 return -1; 74 75 for (i = i - 1; i >= 0 && idx < MAX_NUM_CORE; i--) { 76 c = coremask[i]; 77 if (isxdigit(c) == 0) { 78 /* invalid characters */ 79 return -1; 80 } 81 val = xdigit2val(c); 82 for (j = 0; j < BITS_HEX && idx < MAX_NUM_CORE; j++, idx++) { 83 if ((1 << j) & val) { 84 mask |= (1ULL << idx); 85 count++; 86 } 87 } 88 } 89 for (; i >= 0; i--) 90 if (coremask[i] != '0') 91 return -1; 92 if (count == 0) 93 return -1; 94 return mask; 95 } 96 97 static struct option long_options[] = { 98 {"workers", required_argument, 0, 'w'}, 99 {"packets", required_argument, 0, 'n'}, 100 {"atomic-flows", required_argument, 0, 'f'}, 101 {"num_stages", required_argument, 0, 's'}, 102 {"rx-mask", required_argument, 0, 'r'}, 103 {"tx-mask", required_argument, 0, 't'}, 104 {"sched-mask", required_argument, 0, 'e'}, 105 {"cq-depth", required_argument, 0, 'c'}, 106 {"work-cycles", required_argument, 0, 'W'}, 107 {"mempool-size", required_argument, 0, 'm'}, 108 {"queue-priority", no_argument, 0, 'P'}, 109 {"parallel", no_argument, 0, 'p'}, 110 {"ordered", no_argument, 0, 'o'}, 111 {"quiet", no_argument, 0, 'q'}, 112 {"use-atq", no_argument, 0, 'a'}, 113 {"dump", no_argument, 0, 'D'}, 114 {0, 0, 0, 0} 115 }; 116 117 static void 118 usage(void) 119 { 120 const char *usage_str = 121 " Usage: eventdev_demo [options]\n" 122 " Options:\n" 123 " -n, --packets=N Send N packets (default ~32M), 0 implies no limit\n" 124 " -f, --atomic-flows=N Use N random flows from 1 to N (default 16)\n" 125 " -s, --num_stages=N Use N atomic stages (default 1)\n" 126 " -r, --rx-mask=core mask Run NIC rx on CPUs in core mask\n" 127 " -w, --worker-mask=core mask Run worker on CPUs in core mask\n" 128 " -t, --tx-mask=core mask Run NIC tx on CPUs in core mask\n" 129 " -e --sched-mask=core mask Run scheduler on CPUs in core mask\n" 130 " -c --cq-depth=N Worker CQ depth (default 16)\n" 131 " -W --work-cycles=N Worker cycles (default 0)\n" 132 " -P --queue-priority Enable scheduler queue prioritization\n" 133 " -o, --ordered Use ordered scheduling\n" 134 " -p, --parallel Use parallel scheduling\n" 135 " -q, --quiet Minimize printed output\n" 136 " -a, --use-atq Use all type queues\n" 137 " -m, --mempool-size=N Dictate the mempool size\n" 138 " -D, --dump Print detailed statistics before exit" 139 "\n"; 140 fprintf(stderr, "%s", usage_str); 141 exit(1); 142 } 143 144 static void 145 parse_app_args(int argc, char **argv) 146 { 147 /* Parse cli options*/ 148 int option_index; 149 int c; 150 opterr = 0; 151 uint64_t rx_lcore_mask = 0; 152 uint64_t tx_lcore_mask = 0; 153 uint64_t sched_lcore_mask = 0; 154 uint64_t worker_lcore_mask = 0; 155 int i; 156 157 for (;;) { 158 c = getopt_long(argc, argv, "r:t:e:c:w:n:f:s:m:paoPqDW:", 159 long_options, &option_index); 160 if (c == -1) 161 break; 162 163 int popcnt = 0; 164 switch (c) { 165 case 'n': 166 cdata.num_packets = (int64_t)atol(optarg); 167 if (cdata.num_packets == 0) 168 cdata.num_packets = INT64_MAX; 169 break; 170 case 'f': 171 cdata.num_fids = (unsigned int)atoi(optarg); 172 break; 173 case 's': 174 cdata.num_stages = (unsigned int)atoi(optarg); 175 break; 176 case 'c': 177 cdata.worker_cq_depth = (unsigned int)atoi(optarg); 178 break; 179 case 'W': 180 cdata.worker_cycles = (unsigned int)atoi(optarg); 181 break; 182 case 'P': 183 cdata.enable_queue_priorities = 1; 184 break; 185 case 'o': 186 cdata.queue_type = RTE_SCHED_TYPE_ORDERED; 187 break; 188 case 'p': 189 cdata.queue_type = RTE_SCHED_TYPE_PARALLEL; 190 break; 191 case 'a': 192 cdata.all_type_queues = 1; 193 break; 194 case 'q': 195 cdata.quiet = 1; 196 break; 197 case 'D': 198 cdata.dump_dev = 1; 199 break; 200 case 'w': 201 worker_lcore_mask = parse_coremask(optarg); 202 break; 203 case 'r': 204 rx_lcore_mask = parse_coremask(optarg); 205 popcnt = __builtin_popcountll(rx_lcore_mask); 206 fdata->rx_single = (popcnt == 1); 207 break; 208 case 't': 209 tx_lcore_mask = parse_coremask(optarg); 210 popcnt = __builtin_popcountll(tx_lcore_mask); 211 fdata->tx_single = (popcnt == 1); 212 break; 213 case 'e': 214 sched_lcore_mask = parse_coremask(optarg); 215 popcnt = __builtin_popcountll(sched_lcore_mask); 216 fdata->sched_single = (popcnt == 1); 217 break; 218 case 'm': 219 cdata.num_mbuf = (uint64_t)atol(optarg); 220 break; 221 default: 222 usage(); 223 } 224 } 225 226 cdata.worker_lcore_mask = worker_lcore_mask; 227 cdata.sched_lcore_mask = sched_lcore_mask; 228 cdata.rx_lcore_mask = rx_lcore_mask; 229 cdata.tx_lcore_mask = tx_lcore_mask; 230 231 if (cdata.num_stages == 0 || cdata.num_stages > MAX_NUM_STAGES) 232 usage(); 233 234 for (i = 0; i < MAX_NUM_CORE; i++) { 235 fdata->rx_core[i] = !!(rx_lcore_mask & (1ULL << i)); 236 fdata->tx_core[i] = !!(tx_lcore_mask & (1ULL << i)); 237 fdata->sched_core[i] = !!(sched_lcore_mask & (1ULL << i)); 238 fdata->worker_core[i] = !!(worker_lcore_mask & (1ULL << i)); 239 240 if (fdata->worker_core[i]) 241 cdata.num_workers++; 242 if (core_in_use(i)) 243 cdata.active_cores++; 244 } 245 } 246 247 static void 248 do_capability_setup(uint8_t eventdev_id) 249 { 250 int ret; 251 uint16_t i; 252 uint8_t generic_pipeline = 0; 253 uint8_t burst = 0; 254 255 RTE_ETH_FOREACH_DEV(i) { 256 uint32_t caps = 0; 257 258 ret = rte_event_eth_tx_adapter_caps_get(eventdev_id, i, &caps); 259 if (ret) 260 rte_exit(EXIT_FAILURE, 261 "Invalid capability for Tx adptr port %d\n", i); 262 generic_pipeline |= !(caps & 263 RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT); 264 } 265 266 struct rte_event_dev_info eventdev_info; 267 memset(&eventdev_info, 0, sizeof(struct rte_event_dev_info)); 268 269 rte_event_dev_info_get(eventdev_id, &eventdev_info); 270 burst = eventdev_info.event_dev_cap & RTE_EVENT_DEV_CAP_BURST_MODE ? 1 : 271 0; 272 273 if (generic_pipeline) 274 set_worker_generic_setup_data(&fdata->cap, burst); 275 else 276 set_worker_tx_enq_setup_data(&fdata->cap, burst); 277 } 278 279 static void 280 signal_handler(int signum) 281 { 282 static uint8_t once; 283 uint16_t portid; 284 285 if (fdata->done) 286 rte_exit(1, "Exiting on signal %d\n", signum); 287 if ((signum == SIGINT || signum == SIGTERM) && !once) { 288 printf("\n\nSignal %d received, preparing to exit...\n", 289 signum); 290 if (cdata.dump_dev) 291 rte_event_dev_dump(0, stdout); 292 once = 1; 293 fdata->done = 1; 294 rte_smp_wmb(); 295 296 RTE_ETH_FOREACH_DEV(portid) { 297 rte_event_eth_rx_adapter_stop(portid); 298 rte_event_eth_tx_adapter_stop(portid); 299 rte_eth_dev_stop(portid); 300 } 301 302 rte_eal_mp_wait_lcore(); 303 304 } 305 if (signum == SIGTSTP) 306 rte_event_dev_dump(0, stdout); 307 } 308 309 static inline uint64_t 310 port_stat(int dev_id, int32_t p) 311 { 312 char statname[64]; 313 snprintf(statname, sizeof(statname), "port_%u_rx", p); 314 return rte_event_dev_xstats_by_name_get(dev_id, statname, NULL); 315 } 316 317 int 318 main(int argc, char **argv) 319 { 320 struct worker_data *worker_data; 321 uint16_t num_ports; 322 uint16_t portid; 323 int lcore_id; 324 int err; 325 326 signal(SIGINT, signal_handler); 327 signal(SIGTERM, signal_handler); 328 signal(SIGTSTP, signal_handler); 329 330 err = rte_eal_init(argc, argv); 331 if (err < 0) 332 rte_panic("Invalid EAL arguments\n"); 333 334 argc -= err; 335 argv += err; 336 337 fdata = rte_malloc(NULL, sizeof(struct fastpath_data), 0); 338 if (fdata == NULL) 339 rte_panic("Out of memory\n"); 340 341 /* Parse cli options*/ 342 parse_app_args(argc, argv); 343 344 num_ports = rte_eth_dev_count_avail(); 345 if (num_ports == 0) 346 rte_panic("No ethernet ports found\n"); 347 348 const unsigned int cores_needed = cdata.active_cores; 349 350 if (!cdata.quiet) { 351 printf(" Config:\n"); 352 printf("\tports: %u\n", num_ports); 353 printf("\tworkers: %u\n", cdata.num_workers); 354 printf("\tpackets: %"PRIi64"\n", cdata.num_packets); 355 printf("\tQueue-prio: %u\n", cdata.enable_queue_priorities); 356 if (cdata.queue_type == RTE_SCHED_TYPE_ORDERED) 357 printf("\tqid0 type: ordered\n"); 358 if (cdata.queue_type == RTE_SCHED_TYPE_ATOMIC) 359 printf("\tqid0 type: atomic\n"); 360 printf("\tCores available: %u\n", rte_lcore_count()); 361 printf("\tCores used: %u\n", cores_needed); 362 } 363 364 if (rte_lcore_count() < cores_needed) 365 rte_panic("Too few cores (%d < %d)\n", rte_lcore_count(), 366 cores_needed); 367 368 const unsigned int ndevs = rte_event_dev_count(); 369 if (ndevs == 0) 370 rte_panic("No dev_id devs found. Pasl in a --vdev eventdev.\n"); 371 if (ndevs > 1) 372 fprintf(stderr, "Warning: More than one eventdev, using idx 0"); 373 374 375 do_capability_setup(0); 376 fdata->cap.check_opt(); 377 378 worker_data = rte_calloc(0, cdata.num_workers, 379 sizeof(worker_data[0]), 0); 380 if (worker_data == NULL) 381 rte_panic("rte_calloc failed\n"); 382 383 int dev_id = fdata->cap.evdev_setup(worker_data); 384 if (dev_id < 0) 385 rte_exit(EXIT_FAILURE, "Error setting up eventdev\n"); 386 387 fdata->cap.adptr_setup(num_ports); 388 389 /* Start the Ethernet port. */ 390 RTE_ETH_FOREACH_DEV(portid) { 391 err = rte_eth_dev_start(portid); 392 if (err < 0) 393 rte_exit(EXIT_FAILURE, "Error starting ethdev %d\n", 394 portid); 395 } 396 397 int worker_idx = 0; 398 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 399 if (lcore_id >= MAX_NUM_CORE) 400 break; 401 402 if (!fdata->rx_core[lcore_id] && 403 !fdata->worker_core[lcore_id] && 404 !fdata->tx_core[lcore_id] && 405 !fdata->sched_core[lcore_id]) 406 continue; 407 408 if (fdata->rx_core[lcore_id]) 409 printf( 410 "[%s()] lcore %d executing NIC Rx\n", 411 __func__, lcore_id); 412 413 if (fdata->tx_core[lcore_id]) 414 printf( 415 "[%s()] lcore %d executing NIC Tx\n", 416 __func__, lcore_id); 417 418 if (fdata->sched_core[lcore_id]) 419 printf("[%s()] lcore %d executing scheduler\n", 420 __func__, lcore_id); 421 422 if (fdata->worker_core[lcore_id]) 423 printf( 424 "[%s()] lcore %d executing worker, using eventdev port %u\n", 425 __func__, lcore_id, 426 worker_data[worker_idx].port_id); 427 428 err = rte_eal_remote_launch(fdata->cap.worker, 429 &worker_data[worker_idx], lcore_id); 430 if (err) { 431 rte_panic("Failed to launch worker on core %d\n", 432 lcore_id); 433 continue; 434 } 435 if (fdata->worker_core[lcore_id]) 436 worker_idx++; 437 } 438 439 lcore_id = rte_lcore_id(); 440 441 if (core_in_use(lcore_id)) 442 fdata->cap.worker(&worker_data[worker_idx++]); 443 444 rte_eal_mp_wait_lcore(); 445 446 if (!cdata.quiet && (port_stat(dev_id, worker_data[0].port_id) != 447 (uint64_t)-ENOTSUP)) { 448 printf("\nPort Workload distribution:\n"); 449 uint32_t i; 450 uint64_t tot_pkts = 0; 451 uint64_t pkts_per_wkr[RTE_MAX_LCORE] = {0}; 452 for (i = 0; i < cdata.num_workers; i++) { 453 pkts_per_wkr[i] = 454 port_stat(dev_id, worker_data[i].port_id); 455 tot_pkts += pkts_per_wkr[i]; 456 } 457 for (i = 0; i < cdata.num_workers; i++) { 458 float pc = pkts_per_wkr[i] * 100 / 459 ((float)tot_pkts); 460 printf("worker %i :\t%.1f %% (%"PRIu64" pkts)\n", 461 i, pc, pkts_per_wkr[i]); 462 } 463 464 } 465 466 RTE_ETH_FOREACH_DEV(portid) { 467 rte_eth_dev_close(portid); 468 } 469 470 rte_event_dev_stop(0); 471 rte_event_dev_close(0); 472 473 rte_eal_cleanup(); 474 475 return 0; 476 } 477