1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Cavium, Inc 3 */ 4 5 #include "test_perf_common.h" 6 7 int 8 perf_test_result(struct evt_test *test, struct evt_options *opt) 9 { 10 RTE_SET_USED(opt); 11 int i; 12 uint64_t total = 0; 13 struct test_perf *t = evt_test_priv(test); 14 15 printf("Packet distribution across worker cores :\n"); 16 for (i = 0; i < t->nb_workers; i++) 17 total += t->worker[i].processed_pkts; 18 for (i = 0; i < t->nb_workers; i++) 19 printf("Worker %d packets: "CLGRN"%"PRIx64" "CLNRM"percentage:" 20 CLGRN" %3.2f\n"CLNRM, i, 21 t->worker[i].processed_pkts, 22 (((double)t->worker[i].processed_pkts)/total) 23 * 100); 24 25 return t->result; 26 } 27 28 static inline int 29 perf_producer(void *arg) 30 { 31 struct prod_data *p = arg; 32 struct test_perf *t = p->t; 33 struct evt_options *opt = t->opt; 34 const uint8_t dev_id = p->dev_id; 35 const uint8_t port = p->port_id; 36 struct rte_mempool *pool = t->pool; 37 const uint64_t nb_pkts = t->nb_pkts; 38 const uint32_t nb_flows = t->nb_flows; 39 uint32_t flow_counter = 0; 40 uint64_t count = 0; 41 struct perf_elt *m; 42 struct rte_event ev; 43 44 if (opt->verbose_level > 1) 45 printf("%s(): lcore %d dev_id %d port=%d queue %d\n", __func__, 46 rte_lcore_id(), dev_id, port, p->queue_id); 47 48 ev.event = 0; 49 ev.op = RTE_EVENT_OP_NEW; 50 ev.queue_id = p->queue_id; 51 ev.sched_type = t->opt->sched_type_list[0]; 52 ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL; 53 ev.event_type = RTE_EVENT_TYPE_CPU; 54 ev.sub_event_type = 0; /* stage 0 */ 55 56 while (count < nb_pkts && t->done == false) { 57 if (rte_mempool_get(pool, (void **)&m) < 0) 58 continue; 59 60 ev.flow_id = flow_counter++ % nb_flows; 61 ev.event_ptr = m; 62 m->timestamp = rte_get_timer_cycles(); 63 while (rte_event_enqueue_burst(dev_id, port, &ev, 1) != 1) { 64 if (t->done) 65 break; 66 rte_pause(); 67 m->timestamp = rte_get_timer_cycles(); 68 } 69 count++; 70 } 71 72 return 0; 73 } 74 75 static int 76 perf_producer_wrapper(void *arg) 77 { 78 struct prod_data *p = arg; 79 struct test_perf *t = p->t; 80 /* Launch the producer function only in case of synthetic producer. */ 81 if (t->opt->prod_type == EVT_PROD_TYPE_SYNT) 82 return perf_producer(arg); 83 return 0; 84 } 85 86 static inline uint64_t 87 processed_pkts(struct test_perf *t) 88 { 89 uint8_t i; 90 uint64_t total = 0; 91 92 rte_smp_rmb(); 93 for (i = 0; i < t->nb_workers; i++) 94 total += t->worker[i].processed_pkts; 95 96 return total; 97 } 98 99 static inline uint64_t 100 total_latency(struct test_perf *t) 101 { 102 uint8_t i; 103 uint64_t total = 0; 104 105 rte_smp_rmb(); 106 for (i = 0; i < t->nb_workers; i++) 107 total += t->worker[i].latency; 108 109 return total; 110 } 111 112 113 int 114 perf_launch_lcores(struct evt_test *test, struct evt_options *opt, 115 int (*worker)(void *)) 116 { 117 int ret, lcore_id; 118 struct test_perf *t = evt_test_priv(test); 119 120 int port_idx = 0; 121 /* launch workers */ 122 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 123 if (!(opt->wlcores[lcore_id])) 124 continue; 125 126 ret = rte_eal_remote_launch(worker, 127 &t->worker[port_idx], lcore_id); 128 if (ret) { 129 evt_err("failed to launch worker %d", lcore_id); 130 return ret; 131 } 132 port_idx++; 133 } 134 135 /* launch producers */ 136 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 137 if (!(opt->plcores[lcore_id])) 138 continue; 139 140 ret = rte_eal_remote_launch(perf_producer_wrapper, 141 &t->prod[port_idx], lcore_id); 142 if (ret) { 143 evt_err("failed to launch perf_producer %d", lcore_id); 144 return ret; 145 } 146 port_idx++; 147 } 148 149 const uint64_t total_pkts = opt->nb_pkts * 150 evt_nr_active_lcores(opt->plcores); 151 152 uint64_t dead_lock_cycles = rte_get_timer_cycles(); 153 int64_t dead_lock_remaining = total_pkts; 154 const uint64_t dead_lock_sample = rte_get_timer_hz() * 5; 155 156 uint64_t perf_cycles = rte_get_timer_cycles(); 157 int64_t perf_remaining = total_pkts; 158 const uint64_t perf_sample = rte_get_timer_hz(); 159 160 static float total_mpps; 161 static uint64_t samples; 162 163 const uint64_t freq_mhz = rte_get_timer_hz() / 1000000; 164 int64_t remaining = t->outstand_pkts - processed_pkts(t); 165 166 while (t->done == false) { 167 const uint64_t new_cycles = rte_get_timer_cycles(); 168 169 if ((new_cycles - perf_cycles) > perf_sample) { 170 const uint64_t latency = total_latency(t); 171 const uint64_t pkts = processed_pkts(t); 172 173 remaining = t->outstand_pkts - pkts; 174 float mpps = (float)(perf_remaining-remaining)/1000000; 175 176 perf_remaining = remaining; 177 perf_cycles = new_cycles; 178 total_mpps += mpps; 179 ++samples; 180 if (opt->fwd_latency && pkts > 0) { 181 printf(CLGRN"\r%.3f mpps avg %.3f mpps [avg fwd latency %.3f us] "CLNRM, 182 mpps, total_mpps/samples, 183 (float)(latency/pkts)/freq_mhz); 184 } else { 185 printf(CLGRN"\r%.3f mpps avg %.3f mpps"CLNRM, 186 mpps, total_mpps/samples); 187 } 188 fflush(stdout); 189 190 if (remaining <= 0) { 191 t->result = EVT_TEST_SUCCESS; 192 if (opt->prod_type == EVT_PROD_TYPE_SYNT) { 193 t->done = true; 194 rte_smp_wmb(); 195 break; 196 } 197 } 198 } 199 200 if (new_cycles - dead_lock_cycles > dead_lock_sample && 201 opt->prod_type == EVT_PROD_TYPE_SYNT) { 202 remaining = t->outstand_pkts - processed_pkts(t); 203 if (dead_lock_remaining == remaining) { 204 rte_event_dev_dump(opt->dev_id, stdout); 205 evt_err("No schedules for seconds, deadlock"); 206 t->done = true; 207 rte_smp_wmb(); 208 break; 209 } 210 dead_lock_remaining = remaining; 211 dead_lock_cycles = new_cycles; 212 } 213 } 214 printf("\n"); 215 return 0; 216 } 217 218 static int 219 perf_event_rx_adapter_setup(struct evt_options *opt, uint8_t stride, 220 struct rte_event_port_conf prod_conf) 221 { 222 int ret = 0; 223 uint16_t prod; 224 struct rte_event_eth_rx_adapter_queue_conf queue_conf; 225 226 memset(&queue_conf, 0, 227 sizeof(struct rte_event_eth_rx_adapter_queue_conf)); 228 queue_conf.ev.sched_type = opt->sched_type_list[0]; 229 for (prod = 0; prod < rte_eth_dev_count(); prod++) { 230 uint32_t cap; 231 232 ret = rte_event_eth_rx_adapter_caps_get(opt->dev_id, 233 prod, &cap); 234 if (ret) { 235 evt_err("failed to get event rx adapter[%d]" 236 " capabilities", 237 opt->dev_id); 238 return ret; 239 } 240 queue_conf.ev.queue_id = prod * stride; 241 ret = rte_event_eth_rx_adapter_create(prod, opt->dev_id, 242 &prod_conf); 243 if (ret) { 244 evt_err("failed to create rx adapter[%d]", prod); 245 return ret; 246 } 247 ret = rte_event_eth_rx_adapter_queue_add(prod, prod, -1, 248 &queue_conf); 249 if (ret) { 250 evt_err("failed to add rx queues to adapter[%d]", prod); 251 return ret; 252 } 253 254 if (!(cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT)) { 255 uint32_t service_id; 256 257 rte_event_eth_rx_adapter_service_id_get(prod, 258 &service_id); 259 ret = evt_service_setup(service_id); 260 if (ret) { 261 evt_err("Failed to setup service core" 262 " for Rx adapter\n"); 263 return ret; 264 } 265 } 266 267 ret = rte_eth_dev_start(prod); 268 if (ret) { 269 evt_err("Ethernet dev [%d] failed to start." 270 " Using synthetic producer", prod); 271 return ret; 272 } 273 274 ret = rte_event_eth_rx_adapter_start(prod); 275 if (ret) { 276 evt_err("Rx adapter[%d] start failed", prod); 277 return ret; 278 } 279 printf("%s: Port[%d] using Rx adapter[%d] started\n", __func__, 280 prod, prod); 281 } 282 283 return ret; 284 } 285 286 int 287 perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt, 288 uint8_t stride, uint8_t nb_queues) 289 { 290 struct test_perf *t = evt_test_priv(test); 291 uint16_t port, prod; 292 int ret = -1; 293 struct rte_event_port_conf port_conf; 294 295 memset(&port_conf, 0, sizeof(struct rte_event_port_conf)); 296 rte_event_port_default_conf_get(opt->dev_id, 0, &port_conf); 297 298 /* port configuration */ 299 const struct rte_event_port_conf wkr_p_conf = { 300 .dequeue_depth = opt->wkr_deq_dep, 301 .enqueue_depth = port_conf.enqueue_depth, 302 .new_event_threshold = port_conf.new_event_threshold, 303 }; 304 305 /* setup one port per worker, linking to all queues */ 306 for (port = 0; port < evt_nr_active_lcores(opt->wlcores); 307 port++) { 308 struct worker_data *w = &t->worker[port]; 309 310 w->dev_id = opt->dev_id; 311 w->port_id = port; 312 w->t = t; 313 w->processed_pkts = 0; 314 w->latency = 0; 315 316 ret = rte_event_port_setup(opt->dev_id, port, &wkr_p_conf); 317 if (ret) { 318 evt_err("failed to setup port %d", port); 319 return ret; 320 } 321 322 ret = rte_event_port_link(opt->dev_id, port, NULL, NULL, 0); 323 if (ret != nb_queues) { 324 evt_err("failed to link all queues to port %d", port); 325 return -EINVAL; 326 } 327 } 328 329 /* port for producers, no links */ 330 struct rte_event_port_conf prod_conf = { 331 .dequeue_depth = port_conf.dequeue_depth, 332 .enqueue_depth = port_conf.enqueue_depth, 333 .new_event_threshold = port_conf.new_event_threshold, 334 }; 335 if (opt->prod_type == EVT_PROD_TYPE_ETH_RX_ADPTR) { 336 for ( ; port < perf_nb_event_ports(opt); port++) { 337 struct prod_data *p = &t->prod[port]; 338 p->t = t; 339 } 340 341 ret = perf_event_rx_adapter_setup(opt, stride, prod_conf); 342 if (ret) 343 return ret; 344 } else { 345 prod = 0; 346 for ( ; port < perf_nb_event_ports(opt); port++) { 347 struct prod_data *p = &t->prod[port]; 348 349 p->dev_id = opt->dev_id; 350 p->port_id = port; 351 p->queue_id = prod * stride; 352 p->t = t; 353 354 ret = rte_event_port_setup(opt->dev_id, port, 355 &prod_conf); 356 if (ret) { 357 evt_err("failed to setup port %d", port); 358 return ret; 359 } 360 prod++; 361 } 362 } 363 364 return ret; 365 } 366 367 int 368 perf_opt_check(struct evt_options *opt, uint64_t nb_queues) 369 { 370 unsigned int lcores; 371 372 /* N producer + N worker + 1 master when producer cores are used 373 * Else N worker + 1 master when Rx adapter is used 374 */ 375 lcores = opt->prod_type == EVT_PROD_TYPE_SYNT ? 3 : 2; 376 377 if (rte_lcore_count() < lcores) { 378 evt_err("test need minimum %d lcores", lcores); 379 return -1; 380 } 381 382 /* Validate worker lcores */ 383 if (evt_lcores_has_overlap(opt->wlcores, rte_get_master_lcore())) { 384 evt_err("worker lcores overlaps with master lcore"); 385 return -1; 386 } 387 if (evt_lcores_has_overlap_multi(opt->wlcores, opt->plcores)) { 388 evt_err("worker lcores overlaps producer lcores"); 389 return -1; 390 } 391 if (evt_has_disabled_lcore(opt->wlcores)) { 392 evt_err("one or more workers lcores are not enabled"); 393 return -1; 394 } 395 if (!evt_has_active_lcore(opt->wlcores)) { 396 evt_err("minimum one worker is required"); 397 return -1; 398 } 399 400 if (opt->prod_type == EVT_PROD_TYPE_SYNT) { 401 /* Validate producer lcores */ 402 if (evt_lcores_has_overlap(opt->plcores, 403 rte_get_master_lcore())) { 404 evt_err("producer lcores overlaps with master lcore"); 405 return -1; 406 } 407 if (evt_has_disabled_lcore(opt->plcores)) { 408 evt_err("one or more producer lcores are not enabled"); 409 return -1; 410 } 411 if (!evt_has_active_lcore(opt->plcores)) { 412 evt_err("minimum one producer is required"); 413 return -1; 414 } 415 } 416 417 if (evt_has_invalid_stage(opt)) 418 return -1; 419 420 if (evt_has_invalid_sched_type(opt)) 421 return -1; 422 423 if (nb_queues > EVT_MAX_QUEUES) { 424 evt_err("number of queues exceeds %d", EVT_MAX_QUEUES); 425 return -1; 426 } 427 if (perf_nb_event_ports(opt) > EVT_MAX_PORTS) { 428 evt_err("number of ports exceeds %d", EVT_MAX_PORTS); 429 return -1; 430 } 431 432 /* Fixups */ 433 if (opt->nb_stages == 1 && opt->fwd_latency) { 434 evt_info("fwd_latency is valid when nb_stages > 1, disabling"); 435 opt->fwd_latency = 0; 436 } 437 if (opt->fwd_latency && !opt->q_priority) { 438 evt_info("enabled queue priority for latency measurement"); 439 opt->q_priority = 1; 440 } 441 if (opt->nb_pkts == 0) 442 opt->nb_pkts = INT64_MAX/evt_nr_active_lcores(opt->plcores); 443 444 return 0; 445 } 446 447 void 448 perf_opt_dump(struct evt_options *opt, uint8_t nb_queues) 449 { 450 evt_dump("nb_prod_lcores", "%d", evt_nr_active_lcores(opt->plcores)); 451 evt_dump_producer_lcores(opt); 452 evt_dump("nb_worker_lcores", "%d", evt_nr_active_lcores(opt->wlcores)); 453 evt_dump_worker_lcores(opt); 454 evt_dump_nb_stages(opt); 455 evt_dump("nb_evdev_ports", "%d", perf_nb_event_ports(opt)); 456 evt_dump("nb_evdev_queues", "%d", nb_queues); 457 evt_dump_queue_priority(opt); 458 evt_dump_sched_type_list(opt); 459 evt_dump_producer_type(opt); 460 } 461 462 void 463 perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt) 464 { 465 RTE_SET_USED(test); 466 467 rte_event_dev_stop(opt->dev_id); 468 rte_event_dev_close(opt->dev_id); 469 } 470 471 static inline void 472 perf_elt_init(struct rte_mempool *mp, void *arg __rte_unused, 473 void *obj, unsigned i __rte_unused) 474 { 475 memset(obj, 0, mp->elt_size); 476 } 477 478 #define NB_RX_DESC 128 479 #define NB_TX_DESC 512 480 int 481 perf_ethdev_setup(struct evt_test *test, struct evt_options *opt) 482 { 483 int i; 484 struct test_perf *t = evt_test_priv(test); 485 struct rte_eth_conf port_conf = { 486 .rxmode = { 487 .mq_mode = ETH_MQ_RX_RSS, 488 .max_rx_pkt_len = ETHER_MAX_LEN, 489 .split_hdr_size = 0, 490 .header_split = 0, 491 .hw_ip_checksum = 0, 492 .hw_vlan_filter = 0, 493 .hw_vlan_strip = 0, 494 .hw_vlan_extend = 0, 495 .jumbo_frame = 0, 496 .hw_strip_crc = 1, 497 }, 498 .rx_adv_conf = { 499 .rss_conf = { 500 .rss_key = NULL, 501 .rss_hf = ETH_RSS_IP, 502 }, 503 }, 504 }; 505 506 if (opt->prod_type == EVT_PROD_TYPE_SYNT) 507 return 0; 508 509 if (!rte_eth_dev_count()) { 510 evt_err("No ethernet ports found."); 511 return -ENODEV; 512 } 513 514 for (i = 0; i < rte_eth_dev_count(); i++) { 515 516 if (rte_eth_dev_configure(i, 1, 1, 517 &port_conf) 518 < 0) { 519 evt_err("Failed to configure eth port [%d]", i); 520 return -EINVAL; 521 } 522 523 if (rte_eth_rx_queue_setup(i, 0, NB_RX_DESC, 524 rte_socket_id(), NULL, t->pool) < 0) { 525 evt_err("Failed to setup eth port [%d] rx_queue: %d.", 526 i, 0); 527 return -EINVAL; 528 } 529 530 if (rte_eth_tx_queue_setup(i, 0, NB_TX_DESC, 531 rte_socket_id(), NULL) < 0) { 532 evt_err("Failed to setup eth port [%d] tx_queue: %d.", 533 i, 0); 534 return -EINVAL; 535 } 536 537 rte_eth_promiscuous_enable(i); 538 } 539 540 return 0; 541 } 542 543 void perf_ethdev_destroy(struct evt_test *test, struct evt_options *opt) 544 { 545 int i; 546 RTE_SET_USED(test); 547 548 if (opt->prod_type == EVT_PROD_TYPE_ETH_RX_ADPTR) { 549 for (i = 0; i < rte_eth_dev_count(); i++) { 550 rte_event_eth_rx_adapter_stop(i); 551 rte_eth_dev_stop(i); 552 rte_eth_dev_close(i); 553 } 554 } 555 } 556 557 int 558 perf_mempool_setup(struct evt_test *test, struct evt_options *opt) 559 { 560 struct test_perf *t = evt_test_priv(test); 561 562 if (opt->prod_type == EVT_PROD_TYPE_SYNT) { 563 t->pool = rte_mempool_create(test->name, /* mempool name */ 564 opt->pool_sz, /* number of elements*/ 565 sizeof(struct perf_elt), /* element size*/ 566 512, /* cache size*/ 567 0, NULL, NULL, 568 perf_elt_init, /* obj constructor */ 569 NULL, opt->socket_id, 0); /* flags */ 570 } else { 571 t->pool = rte_pktmbuf_pool_create(test->name, /* mempool name */ 572 opt->pool_sz, /* number of elements*/ 573 512, /* cache size*/ 574 0, 575 RTE_MBUF_DEFAULT_BUF_SIZE, 576 opt->socket_id); /* flags */ 577 578 } 579 580 if (t->pool == NULL) { 581 evt_err("failed to create mempool"); 582 return -ENOMEM; 583 } 584 585 return 0; 586 } 587 588 void 589 perf_mempool_destroy(struct evt_test *test, struct evt_options *opt) 590 { 591 RTE_SET_USED(opt); 592 struct test_perf *t = evt_test_priv(test); 593 594 rte_mempool_free(t->pool); 595 } 596 597 int 598 perf_test_setup(struct evt_test *test, struct evt_options *opt) 599 { 600 void *test_perf; 601 602 test_perf = rte_zmalloc_socket(test->name, sizeof(struct test_perf), 603 RTE_CACHE_LINE_SIZE, opt->socket_id); 604 if (test_perf == NULL) { 605 evt_err("failed to allocate test_perf memory"); 606 goto nomem; 607 } 608 test->test_priv = test_perf; 609 610 struct test_perf *t = evt_test_priv(test); 611 612 t->outstand_pkts = opt->nb_pkts * evt_nr_active_lcores(opt->plcores); 613 t->nb_workers = evt_nr_active_lcores(opt->wlcores); 614 t->done = false; 615 t->nb_pkts = opt->nb_pkts; 616 t->nb_flows = opt->nb_flows; 617 t->result = EVT_TEST_FAILED; 618 t->opt = opt; 619 memcpy(t->sched_type_list, opt->sched_type_list, 620 sizeof(opt->sched_type_list)); 621 return 0; 622 nomem: 623 return -ENOMEM; 624 } 625 626 void 627 perf_test_destroy(struct evt_test *test, struct evt_options *opt) 628 { 629 RTE_SET_USED(opt); 630 631 rte_free(test->test_priv); 632 } 633