1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Cavium, Inc 3 */ 4 5 #include "test_perf_common.h" 6 7 int 8 perf_test_result(struct evt_test *test, struct evt_options *opt) 9 { 10 RTE_SET_USED(opt); 11 struct test_perf *t = evt_test_priv(test); 12 13 return t->result; 14 } 15 16 static inline int 17 perf_producer(void *arg) 18 { 19 struct prod_data *p = arg; 20 struct test_perf *t = p->t; 21 struct evt_options *opt = t->opt; 22 const uint8_t dev_id = p->dev_id; 23 const uint8_t port = p->port_id; 24 struct rte_mempool *pool = t->pool; 25 const uint64_t nb_pkts = t->nb_pkts; 26 const uint32_t nb_flows = t->nb_flows; 27 uint32_t flow_counter = 0; 28 uint64_t count = 0; 29 struct perf_elt *m; 30 struct rte_event ev; 31 32 if (opt->verbose_level > 1) 33 printf("%s(): lcore %d dev_id %d port=%d queue %d\n", __func__, 34 rte_lcore_id(), dev_id, port, p->queue_id); 35 36 ev.event = 0; 37 ev.op = RTE_EVENT_OP_NEW; 38 ev.queue_id = p->queue_id; 39 ev.sched_type = t->opt->sched_type_list[0]; 40 ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL; 41 ev.event_type = RTE_EVENT_TYPE_CPU; 42 ev.sub_event_type = 0; /* stage 0 */ 43 44 while (count < nb_pkts && t->done == false) { 45 if (rte_mempool_get(pool, (void **)&m) < 0) 46 continue; 47 48 ev.flow_id = flow_counter++ % nb_flows; 49 ev.event_ptr = m; 50 m->timestamp = rte_get_timer_cycles(); 51 while (rte_event_enqueue_burst(dev_id, port, &ev, 1) != 1) { 52 if (t->done) 53 break; 54 rte_pause(); 55 m->timestamp = rte_get_timer_cycles(); 56 } 57 count++; 58 } 59 60 return 0; 61 } 62 63 static int 64 perf_producer_wrapper(void *arg) 65 { 66 struct prod_data *p = arg; 67 struct test_perf *t = p->t; 68 /* Launch the producer function only in case of synthetic producer. */ 69 if (t->opt->prod_type == EVT_PROD_TYPE_SYNT) 70 return perf_producer(arg); 71 return 0; 72 } 73 74 static inline uint64_t 75 processed_pkts(struct test_perf *t) 76 { 77 uint8_t i; 78 uint64_t total = 0; 79 80 rte_smp_rmb(); 81 for (i = 0; i < t->nb_workers; i++) 82 total += t->worker[i].processed_pkts; 83 84 return total; 85 } 86 87 static inline uint64_t 88 total_latency(struct test_perf *t) 89 { 90 uint8_t i; 91 uint64_t total = 0; 92 93 rte_smp_rmb(); 94 for (i = 0; i < t->nb_workers; i++) 95 total += t->worker[i].latency; 96 97 return total; 98 } 99 100 101 int 102 perf_launch_lcores(struct evt_test *test, struct evt_options *opt, 103 int (*worker)(void *)) 104 { 105 int ret, lcore_id; 106 struct test_perf *t = evt_test_priv(test); 107 108 int port_idx = 0; 109 /* launch workers */ 110 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 111 if (!(opt->wlcores[lcore_id])) 112 continue; 113 114 ret = rte_eal_remote_launch(worker, 115 &t->worker[port_idx], lcore_id); 116 if (ret) { 117 evt_err("failed to launch worker %d", lcore_id); 118 return ret; 119 } 120 port_idx++; 121 } 122 123 /* launch producers */ 124 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 125 if (!(opt->plcores[lcore_id])) 126 continue; 127 128 ret = rte_eal_remote_launch(perf_producer_wrapper, 129 &t->prod[port_idx], lcore_id); 130 if (ret) { 131 evt_err("failed to launch perf_producer %d", lcore_id); 132 return ret; 133 } 134 port_idx++; 135 } 136 137 const uint64_t total_pkts = opt->nb_pkts * 138 evt_nr_active_lcores(opt->plcores); 139 140 uint64_t dead_lock_cycles = rte_get_timer_cycles(); 141 int64_t dead_lock_remaining = total_pkts; 142 const uint64_t dead_lock_sample = rte_get_timer_hz() * 5; 143 144 uint64_t perf_cycles = rte_get_timer_cycles(); 145 int64_t perf_remaining = total_pkts; 146 const uint64_t perf_sample = rte_get_timer_hz(); 147 148 static float total_mpps; 149 static uint64_t samples; 150 151 const uint64_t freq_mhz = rte_get_timer_hz() / 1000000; 152 int64_t remaining = t->outstand_pkts - processed_pkts(t); 153 154 while (t->done == false) { 155 const uint64_t new_cycles = rte_get_timer_cycles(); 156 157 if ((new_cycles - perf_cycles) > perf_sample) { 158 const uint64_t latency = total_latency(t); 159 const uint64_t pkts = processed_pkts(t); 160 161 remaining = t->outstand_pkts - pkts; 162 float mpps = (float)(perf_remaining-remaining)/1000000; 163 164 perf_remaining = remaining; 165 perf_cycles = new_cycles; 166 total_mpps += mpps; 167 ++samples; 168 if (opt->fwd_latency && pkts > 0) { 169 printf(CLGRN"\r%.3f mpps avg %.3f mpps [avg fwd latency %.3f us] "CLNRM, 170 mpps, total_mpps/samples, 171 (float)(latency/pkts)/freq_mhz); 172 } else { 173 printf(CLGRN"\r%.3f mpps avg %.3f mpps"CLNRM, 174 mpps, total_mpps/samples); 175 } 176 fflush(stdout); 177 178 if (remaining <= 0) { 179 t->result = EVT_TEST_SUCCESS; 180 if (opt->prod_type == EVT_PROD_TYPE_SYNT) { 181 t->done = true; 182 rte_smp_wmb(); 183 break; 184 } 185 } 186 } 187 188 if (new_cycles - dead_lock_cycles > dead_lock_sample && 189 opt->prod_type == EVT_PROD_TYPE_SYNT) { 190 remaining = t->outstand_pkts - processed_pkts(t); 191 if (dead_lock_remaining == remaining) { 192 rte_event_dev_dump(opt->dev_id, stdout); 193 evt_err("No schedules for seconds, deadlock"); 194 t->done = true; 195 rte_smp_wmb(); 196 break; 197 } 198 dead_lock_remaining = remaining; 199 dead_lock_cycles = new_cycles; 200 } 201 } 202 printf("\n"); 203 return 0; 204 } 205 206 static int 207 perf_event_rx_adapter_setup(struct evt_options *opt, uint8_t stride, 208 struct rte_event_port_conf prod_conf) 209 { 210 int ret = 0; 211 uint16_t prod; 212 struct rte_event_eth_rx_adapter_queue_conf queue_conf; 213 214 memset(&queue_conf, 0, 215 sizeof(struct rte_event_eth_rx_adapter_queue_conf)); 216 queue_conf.ev.sched_type = opt->sched_type_list[0]; 217 for (prod = 0; prod < rte_eth_dev_count(); prod++) { 218 uint32_t cap; 219 220 ret = rte_event_eth_rx_adapter_caps_get(opt->dev_id, 221 prod, &cap); 222 if (ret) { 223 evt_err("failed to get event rx adapter[%d]" 224 " capabilities", 225 opt->dev_id); 226 return ret; 227 } 228 queue_conf.ev.queue_id = prod * stride; 229 ret = rte_event_eth_rx_adapter_create(prod, opt->dev_id, 230 &prod_conf); 231 if (ret) { 232 evt_err("failed to create rx adapter[%d]", prod); 233 return ret; 234 } 235 ret = rte_event_eth_rx_adapter_queue_add(prod, prod, -1, 236 &queue_conf); 237 if (ret) { 238 evt_err("failed to add rx queues to adapter[%d]", prod); 239 return ret; 240 } 241 242 if (!(cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT)) { 243 uint32_t service_id; 244 245 rte_event_eth_rx_adapter_service_id_get(prod, 246 &service_id); 247 ret = evt_service_setup(service_id); 248 if (ret) { 249 evt_err("Failed to setup service core" 250 " for Rx adapter\n"); 251 return ret; 252 } 253 } 254 255 ret = rte_eth_dev_start(prod); 256 if (ret) { 257 evt_err("Ethernet dev [%d] failed to start." 258 " Using synthetic producer", prod); 259 return ret; 260 } 261 262 ret = rte_event_eth_rx_adapter_start(prod); 263 if (ret) { 264 evt_err("Rx adapter[%d] start failed", prod); 265 return ret; 266 } 267 printf("%s: Port[%d] using Rx adapter[%d] started\n", __func__, 268 prod, prod); 269 } 270 271 return ret; 272 } 273 274 int 275 perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt, 276 uint8_t stride, uint8_t nb_queues) 277 { 278 struct test_perf *t = evt_test_priv(test); 279 uint16_t port, prod; 280 int ret = -1; 281 struct rte_event_port_conf port_conf; 282 283 memset(&port_conf, 0, sizeof(struct rte_event_port_conf)); 284 rte_event_port_default_conf_get(opt->dev_id, 0, &port_conf); 285 286 /* port configuration */ 287 const struct rte_event_port_conf wkr_p_conf = { 288 .dequeue_depth = opt->wkr_deq_dep, 289 .enqueue_depth = port_conf.enqueue_depth, 290 .new_event_threshold = port_conf.new_event_threshold, 291 }; 292 293 /* setup one port per worker, linking to all queues */ 294 for (port = 0; port < evt_nr_active_lcores(opt->wlcores); 295 port++) { 296 struct worker_data *w = &t->worker[port]; 297 298 w->dev_id = opt->dev_id; 299 w->port_id = port; 300 w->t = t; 301 w->processed_pkts = 0; 302 w->latency = 0; 303 304 ret = rte_event_port_setup(opt->dev_id, port, &wkr_p_conf); 305 if (ret) { 306 evt_err("failed to setup port %d", port); 307 return ret; 308 } 309 310 ret = rte_event_port_link(opt->dev_id, port, NULL, NULL, 0); 311 if (ret != nb_queues) { 312 evt_err("failed to link all queues to port %d", port); 313 return -EINVAL; 314 } 315 } 316 317 /* port for producers, no links */ 318 struct rte_event_port_conf prod_conf = { 319 .dequeue_depth = port_conf.dequeue_depth, 320 .enqueue_depth = port_conf.enqueue_depth, 321 .new_event_threshold = port_conf.new_event_threshold, 322 }; 323 if (opt->prod_type == EVT_PROD_TYPE_ETH_RX_ADPTR) { 324 for ( ; port < perf_nb_event_ports(opt); port++) { 325 struct prod_data *p = &t->prod[port]; 326 p->t = t; 327 } 328 329 ret = perf_event_rx_adapter_setup(opt, stride, prod_conf); 330 if (ret) 331 return ret; 332 } else { 333 prod = 0; 334 for ( ; port < perf_nb_event_ports(opt); port++) { 335 struct prod_data *p = &t->prod[port]; 336 337 p->dev_id = opt->dev_id; 338 p->port_id = port; 339 p->queue_id = prod * stride; 340 p->t = t; 341 342 ret = rte_event_port_setup(opt->dev_id, port, 343 &prod_conf); 344 if (ret) { 345 evt_err("failed to setup port %d", port); 346 return ret; 347 } 348 prod++; 349 } 350 } 351 352 return ret; 353 } 354 355 int 356 perf_opt_check(struct evt_options *opt, uint64_t nb_queues) 357 { 358 unsigned int lcores; 359 360 /* N producer + N worker + 1 master when producer cores are used 361 * Else N worker + 1 master when Rx adapter is used 362 */ 363 lcores = opt->prod_type == EVT_PROD_TYPE_SYNT ? 3 : 2; 364 365 if (rte_lcore_count() < lcores) { 366 evt_err("test need minimum %d lcores", lcores); 367 return -1; 368 } 369 370 /* Validate worker lcores */ 371 if (evt_lcores_has_overlap(opt->wlcores, rte_get_master_lcore())) { 372 evt_err("worker lcores overlaps with master lcore"); 373 return -1; 374 } 375 if (evt_lcores_has_overlap_multi(opt->wlcores, opt->plcores)) { 376 evt_err("worker lcores overlaps producer lcores"); 377 return -1; 378 } 379 if (evt_has_disabled_lcore(opt->wlcores)) { 380 evt_err("one or more workers lcores are not enabled"); 381 return -1; 382 } 383 if (!evt_has_active_lcore(opt->wlcores)) { 384 evt_err("minimum one worker is required"); 385 return -1; 386 } 387 388 if (opt->prod_type == EVT_PROD_TYPE_SYNT) { 389 /* Validate producer lcores */ 390 if (evt_lcores_has_overlap(opt->plcores, 391 rte_get_master_lcore())) { 392 evt_err("producer lcores overlaps with master lcore"); 393 return -1; 394 } 395 if (evt_has_disabled_lcore(opt->plcores)) { 396 evt_err("one or more producer lcores are not enabled"); 397 return -1; 398 } 399 if (!evt_has_active_lcore(opt->plcores)) { 400 evt_err("minimum one producer is required"); 401 return -1; 402 } 403 } 404 405 if (evt_has_invalid_stage(opt)) 406 return -1; 407 408 if (evt_has_invalid_sched_type(opt)) 409 return -1; 410 411 if (nb_queues > EVT_MAX_QUEUES) { 412 evt_err("number of queues exceeds %d", EVT_MAX_QUEUES); 413 return -1; 414 } 415 if (perf_nb_event_ports(opt) > EVT_MAX_PORTS) { 416 evt_err("number of ports exceeds %d", EVT_MAX_PORTS); 417 return -1; 418 } 419 420 /* Fixups */ 421 if (opt->nb_stages == 1 && opt->fwd_latency) { 422 evt_info("fwd_latency is valid when nb_stages > 1, disabling"); 423 opt->fwd_latency = 0; 424 } 425 if (opt->fwd_latency && !opt->q_priority) { 426 evt_info("enabled queue priority for latency measurement"); 427 opt->q_priority = 1; 428 } 429 if (opt->nb_pkts == 0) 430 opt->nb_pkts = INT64_MAX/evt_nr_active_lcores(opt->plcores); 431 432 return 0; 433 } 434 435 void 436 perf_opt_dump(struct evt_options *opt, uint8_t nb_queues) 437 { 438 evt_dump("nb_prod_lcores", "%d", evt_nr_active_lcores(opt->plcores)); 439 evt_dump_producer_lcores(opt); 440 evt_dump("nb_worker_lcores", "%d", evt_nr_active_lcores(opt->wlcores)); 441 evt_dump_worker_lcores(opt); 442 evt_dump_nb_stages(opt); 443 evt_dump("nb_evdev_ports", "%d", perf_nb_event_ports(opt)); 444 evt_dump("nb_evdev_queues", "%d", nb_queues); 445 evt_dump_queue_priority(opt); 446 evt_dump_sched_type_list(opt); 447 evt_dump_producer_type(opt); 448 } 449 450 void 451 perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt) 452 { 453 RTE_SET_USED(test); 454 455 rte_event_dev_stop(opt->dev_id); 456 rte_event_dev_close(opt->dev_id); 457 } 458 459 static inline void 460 perf_elt_init(struct rte_mempool *mp, void *arg __rte_unused, 461 void *obj, unsigned i __rte_unused) 462 { 463 memset(obj, 0, mp->elt_size); 464 } 465 466 #define NB_RX_DESC 128 467 #define NB_TX_DESC 512 468 int 469 perf_ethdev_setup(struct evt_test *test, struct evt_options *opt) 470 { 471 int i; 472 struct test_perf *t = evt_test_priv(test); 473 struct rte_eth_conf port_conf = { 474 .rxmode = { 475 .mq_mode = ETH_MQ_RX_RSS, 476 .max_rx_pkt_len = ETHER_MAX_LEN, 477 .split_hdr_size = 0, 478 .header_split = 0, 479 .hw_ip_checksum = 0, 480 .hw_vlan_filter = 0, 481 .hw_vlan_strip = 0, 482 .hw_vlan_extend = 0, 483 .jumbo_frame = 0, 484 .hw_strip_crc = 1, 485 }, 486 .rx_adv_conf = { 487 .rss_conf = { 488 .rss_key = NULL, 489 .rss_hf = ETH_RSS_IP, 490 }, 491 }, 492 }; 493 494 if (opt->prod_type == EVT_PROD_TYPE_SYNT) 495 return 0; 496 497 if (!rte_eth_dev_count()) { 498 evt_err("No ethernet ports found."); 499 return -ENODEV; 500 } 501 502 for (i = 0; i < rte_eth_dev_count(); i++) { 503 504 if (rte_eth_dev_configure(i, 1, 1, 505 &port_conf) 506 < 0) { 507 evt_err("Failed to configure eth port [%d]", i); 508 return -EINVAL; 509 } 510 511 if (rte_eth_rx_queue_setup(i, 0, NB_RX_DESC, 512 rte_socket_id(), NULL, t->pool) < 0) { 513 evt_err("Failed to setup eth port [%d] rx_queue: %d.", 514 i, 0); 515 return -EINVAL; 516 } 517 518 if (rte_eth_tx_queue_setup(i, 0, NB_TX_DESC, 519 rte_socket_id(), NULL) < 0) { 520 evt_err("Failed to setup eth port [%d] tx_queue: %d.", 521 i, 0); 522 return -EINVAL; 523 } 524 525 rte_eth_promiscuous_enable(i); 526 } 527 528 return 0; 529 } 530 531 void perf_ethdev_destroy(struct evt_test *test, struct evt_options *opt) 532 { 533 int i; 534 RTE_SET_USED(test); 535 536 if (opt->prod_type == EVT_PROD_TYPE_ETH_RX_ADPTR) { 537 for (i = 0; i < rte_eth_dev_count(); i++) { 538 rte_event_eth_rx_adapter_stop(i); 539 rte_eth_dev_stop(i); 540 rte_eth_dev_close(i); 541 } 542 } 543 } 544 545 int 546 perf_mempool_setup(struct evt_test *test, struct evt_options *opt) 547 { 548 struct test_perf *t = evt_test_priv(test); 549 550 if (opt->prod_type == EVT_PROD_TYPE_SYNT) { 551 t->pool = rte_mempool_create(test->name, /* mempool name */ 552 opt->pool_sz, /* number of elements*/ 553 sizeof(struct perf_elt), /* element size*/ 554 512, /* cache size*/ 555 0, NULL, NULL, 556 perf_elt_init, /* obj constructor */ 557 NULL, opt->socket_id, 0); /* flags */ 558 } else { 559 t->pool = rte_pktmbuf_pool_create(test->name, /* mempool name */ 560 opt->pool_sz, /* number of elements*/ 561 512, /* cache size*/ 562 0, 563 RTE_MBUF_DEFAULT_BUF_SIZE, 564 opt->socket_id); /* flags */ 565 566 } 567 568 if (t->pool == NULL) { 569 evt_err("failed to create mempool"); 570 return -ENOMEM; 571 } 572 573 return 0; 574 } 575 576 void 577 perf_mempool_destroy(struct evt_test *test, struct evt_options *opt) 578 { 579 RTE_SET_USED(opt); 580 struct test_perf *t = evt_test_priv(test); 581 582 rte_mempool_free(t->pool); 583 } 584 585 int 586 perf_test_setup(struct evt_test *test, struct evt_options *opt) 587 { 588 void *test_perf; 589 590 test_perf = rte_zmalloc_socket(test->name, sizeof(struct test_perf), 591 RTE_CACHE_LINE_SIZE, opt->socket_id); 592 if (test_perf == NULL) { 593 evt_err("failed to allocate test_perf memory"); 594 goto nomem; 595 } 596 test->test_priv = test_perf; 597 598 struct test_perf *t = evt_test_priv(test); 599 600 t->outstand_pkts = opt->nb_pkts * evt_nr_active_lcores(opt->plcores); 601 t->nb_workers = evt_nr_active_lcores(opt->wlcores); 602 t->done = false; 603 t->nb_pkts = opt->nb_pkts; 604 t->nb_flows = opt->nb_flows; 605 t->result = EVT_TEST_FAILED; 606 t->opt = opt; 607 memcpy(t->sched_type_list, opt->sched_type_list, 608 sizeof(opt->sched_type_list)); 609 return 0; 610 nomem: 611 return -ENOMEM; 612 } 613 614 void 615 perf_test_destroy(struct evt_test *test, struct evt_options *opt) 616 { 617 RTE_SET_USED(opt); 618 619 rte_free(test->test_priv); 620 } 621