1 /* 2 * BSD LICENSE 3 * 4 * Copyright (C) Cavium, Inc 2017. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * * Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * * Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in 14 * the documentation and/or other materials provided with the 15 * distribution. 16 * * Neither the name of Cavium, Inc nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include "test_perf_common.h" 34 35 int 36 perf_test_result(struct evt_test *test, struct evt_options *opt) 37 { 38 RTE_SET_USED(opt); 39 struct test_perf *t = evt_test_priv(test); 40 41 return t->result; 42 } 43 44 static inline int 45 perf_producer(void *arg) 46 { 47 struct prod_data *p = arg; 48 struct test_perf *t = p->t; 49 struct evt_options *opt = t->opt; 50 const uint8_t dev_id = p->dev_id; 51 const uint8_t port = p->port_id; 52 struct rte_mempool *pool = t->pool; 53 const uint64_t nb_pkts = t->nb_pkts; 54 const uint32_t nb_flows = t->nb_flows; 55 uint32_t flow_counter = 0; 56 uint64_t count = 0; 57 struct perf_elt *m; 58 struct rte_event ev; 59 60 if (opt->verbose_level > 1) 61 printf("%s(): lcore %d dev_id %d port=%d queue %d\n", __func__, 62 rte_lcore_id(), dev_id, port, p->queue_id); 63 64 ev.event = 0; 65 ev.op = RTE_EVENT_OP_NEW; 66 ev.queue_id = p->queue_id; 67 ev.sched_type = t->opt->sched_type_list[0]; 68 ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL; 69 ev.event_type = RTE_EVENT_TYPE_CPU; 70 ev.sub_event_type = 0; /* stage 0 */ 71 72 while (count < nb_pkts && t->done == false) { 73 if (rte_mempool_get(pool, (void **)&m) < 0) 74 continue; 75 76 ev.flow_id = flow_counter++ % nb_flows; 77 ev.event_ptr = m; 78 m->timestamp = rte_get_timer_cycles(); 79 while (rte_event_enqueue_burst(dev_id, port, &ev, 1) != 1) { 80 if (t->done) 81 break; 82 rte_pause(); 83 m->timestamp = rte_get_timer_cycles(); 84 } 85 count++; 86 } 87 88 return 0; 89 } 90 91 static inline int 92 scheduler(void *arg) 93 { 94 struct test_perf *t = arg; 95 const uint8_t dev_id = t->opt->dev_id; 96 97 while (t->done == false) 98 rte_event_schedule(dev_id); 99 100 return 0; 101 } 102 103 static inline uint64_t 104 processed_pkts(struct test_perf *t) 105 { 106 uint8_t i; 107 uint64_t total = 0; 108 109 rte_smp_rmb(); 110 for (i = 0; i < t->nb_workers; i++) 111 total += t->worker[i].processed_pkts; 112 113 return total; 114 } 115 116 static inline uint64_t 117 total_latency(struct test_perf *t) 118 { 119 uint8_t i; 120 uint64_t total = 0; 121 122 rte_smp_rmb(); 123 for (i = 0; i < t->nb_workers; i++) 124 total += t->worker[i].latency; 125 126 return total; 127 } 128 129 130 int 131 perf_launch_lcores(struct evt_test *test, struct evt_options *opt, 132 int (*worker)(void *)) 133 { 134 int ret, lcore_id; 135 struct test_perf *t = evt_test_priv(test); 136 137 int port_idx = 0; 138 /* launch workers */ 139 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 140 if (!(opt->wlcores[lcore_id])) 141 continue; 142 143 ret = rte_eal_remote_launch(worker, 144 &t->worker[port_idx], lcore_id); 145 if (ret) { 146 evt_err("failed to launch worker %d", lcore_id); 147 return ret; 148 } 149 port_idx++; 150 } 151 152 /* launch producers */ 153 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 154 if (!(opt->plcores[lcore_id])) 155 continue; 156 157 ret = rte_eal_remote_launch(perf_producer, &t->prod[port_idx], 158 lcore_id); 159 if (ret) { 160 evt_err("failed to launch perf_producer %d", lcore_id); 161 return ret; 162 } 163 port_idx++; 164 } 165 166 /* launch scheduler */ 167 if (!evt_has_distributed_sched(opt->dev_id)) { 168 ret = rte_eal_remote_launch(scheduler, t, opt->slcore); 169 if (ret) { 170 evt_err("failed to launch sched %d", opt->slcore); 171 return ret; 172 } 173 } 174 175 const uint64_t total_pkts = opt->nb_pkts * 176 evt_nr_active_lcores(opt->plcores); 177 178 uint64_t dead_lock_cycles = rte_get_timer_cycles(); 179 int64_t dead_lock_remaining = total_pkts; 180 const uint64_t dead_lock_sample = rte_get_timer_hz() * 5; 181 182 uint64_t perf_cycles = rte_get_timer_cycles(); 183 int64_t perf_remaining = total_pkts; 184 const uint64_t perf_sample = rte_get_timer_hz(); 185 186 static float total_mpps; 187 static uint64_t samples; 188 189 const uint64_t freq_mhz = rte_get_timer_hz() / 1000000; 190 int64_t remaining = t->outstand_pkts - processed_pkts(t); 191 192 while (t->done == false) { 193 const uint64_t new_cycles = rte_get_timer_cycles(); 194 195 if ((new_cycles - perf_cycles) > perf_sample) { 196 const uint64_t latency = total_latency(t); 197 const uint64_t pkts = processed_pkts(t); 198 199 remaining = t->outstand_pkts - pkts; 200 float mpps = (float)(perf_remaining-remaining)/1000000; 201 202 perf_remaining = remaining; 203 perf_cycles = new_cycles; 204 total_mpps += mpps; 205 ++samples; 206 if (opt->fwd_latency && pkts > 0) { 207 printf(CLGRN"\r%.3f mpps avg %.3f mpps [avg fwd latency %.3f us] "CLNRM, 208 mpps, total_mpps/samples, 209 (float)(latency/pkts)/freq_mhz); 210 } else { 211 printf(CLGRN"\r%.3f mpps avg %.3f mpps"CLNRM, 212 mpps, total_mpps/samples); 213 } 214 fflush(stdout); 215 216 if (remaining <= 0) { 217 t->done = true; 218 t->result = EVT_TEST_SUCCESS; 219 rte_smp_wmb(); 220 break; 221 } 222 } 223 224 if (new_cycles - dead_lock_cycles > dead_lock_sample) { 225 remaining = t->outstand_pkts - processed_pkts(t); 226 if (dead_lock_remaining == remaining) { 227 rte_event_dev_dump(opt->dev_id, stdout); 228 evt_err("No schedules for seconds, deadlock"); 229 t->done = true; 230 rte_smp_wmb(); 231 break; 232 } 233 dead_lock_remaining = remaining; 234 dead_lock_cycles = new_cycles; 235 } 236 } 237 printf("\n"); 238 return 0; 239 } 240 241 int 242 perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt, 243 uint8_t stride, uint8_t nb_queues) 244 { 245 struct test_perf *t = evt_test_priv(test); 246 uint8_t port, prod; 247 int ret = -1; 248 249 /* port configuration */ 250 const struct rte_event_port_conf wkr_p_conf = { 251 .dequeue_depth = opt->wkr_deq_dep, 252 .enqueue_depth = 64, 253 .new_event_threshold = 4096, 254 }; 255 256 /* setup one port per worker, linking to all queues */ 257 for (port = 0; port < evt_nr_active_lcores(opt->wlcores); 258 port++) { 259 struct worker_data *w = &t->worker[port]; 260 261 w->dev_id = opt->dev_id; 262 w->port_id = port; 263 w->t = t; 264 w->processed_pkts = 0; 265 w->latency = 0; 266 267 ret = rte_event_port_setup(opt->dev_id, port, &wkr_p_conf); 268 if (ret) { 269 evt_err("failed to setup port %d", port); 270 return ret; 271 } 272 273 ret = rte_event_port_link(opt->dev_id, port, NULL, NULL, 0); 274 if (ret != nb_queues) { 275 evt_err("failed to link all queues to port %d", port); 276 return -EINVAL; 277 } 278 } 279 280 /* port for producers, no links */ 281 const struct rte_event_port_conf prod_conf = { 282 .dequeue_depth = 8, 283 .enqueue_depth = 32, 284 .new_event_threshold = 1200, 285 }; 286 prod = 0; 287 for ( ; port < perf_nb_event_ports(opt); port++) { 288 struct prod_data *p = &t->prod[port]; 289 290 p->dev_id = opt->dev_id; 291 p->port_id = port; 292 p->queue_id = prod * stride; 293 p->t = t; 294 295 ret = rte_event_port_setup(opt->dev_id, port, &prod_conf); 296 if (ret) { 297 evt_err("failed to setup port %d", port); 298 return ret; 299 } 300 prod++; 301 } 302 303 return ret; 304 } 305 306 int 307 perf_opt_check(struct evt_options *opt, uint64_t nb_queues) 308 { 309 unsigned int lcores; 310 bool need_slcore = !evt_has_distributed_sched(opt->dev_id); 311 312 /* N producer + N worker + 1 scheduler(based on dev capa) + 1 master */ 313 lcores = need_slcore ? 4 : 3; 314 315 if (rte_lcore_count() < lcores) { 316 evt_err("test need minimum %d lcores", lcores); 317 return -1; 318 } 319 320 /* Validate worker lcores */ 321 if (evt_lcores_has_overlap(opt->wlcores, rte_get_master_lcore())) { 322 evt_err("worker lcores overlaps with master lcore"); 323 return -1; 324 } 325 if (need_slcore && evt_lcores_has_overlap(opt->wlcores, opt->slcore)) { 326 evt_err("worker lcores overlaps with scheduler lcore"); 327 return -1; 328 } 329 if (evt_lcores_has_overlap_multi(opt->wlcores, opt->plcores)) { 330 evt_err("worker lcores overlaps producer lcores"); 331 return -1; 332 } 333 if (evt_has_disabled_lcore(opt->wlcores)) { 334 evt_err("one or more workers lcores are not enabled"); 335 return -1; 336 } 337 if (!evt_has_active_lcore(opt->wlcores)) { 338 evt_err("minimum one worker is required"); 339 return -1; 340 } 341 342 /* Validate producer lcores */ 343 if (evt_lcores_has_overlap(opt->plcores, rte_get_master_lcore())) { 344 evt_err("producer lcores overlaps with master lcore"); 345 return -1; 346 } 347 if (need_slcore && evt_lcores_has_overlap(opt->plcores, opt->slcore)) { 348 evt_err("producer lcores overlaps with scheduler lcore"); 349 return -1; 350 } 351 if (evt_has_disabled_lcore(opt->plcores)) { 352 evt_err("one or more producer lcores are not enabled"); 353 return -1; 354 } 355 if (!evt_has_active_lcore(opt->plcores)) { 356 evt_err("minimum one producer is required"); 357 return -1; 358 } 359 360 /* Validate scheduler lcore */ 361 if (!evt_has_distributed_sched(opt->dev_id) && 362 opt->slcore == (int)rte_get_master_lcore()) { 363 evt_err("scheduler lcore and master lcore should be different"); 364 return -1; 365 } 366 if (need_slcore && !rte_lcore_is_enabled(opt->slcore)) { 367 evt_err("scheduler lcore is not enabled"); 368 return -1; 369 } 370 371 if (evt_has_invalid_stage(opt)) 372 return -1; 373 374 if (evt_has_invalid_sched_type(opt)) 375 return -1; 376 377 if (nb_queues > EVT_MAX_QUEUES) { 378 evt_err("number of queues exceeds %d", EVT_MAX_QUEUES); 379 return -1; 380 } 381 if (perf_nb_event_ports(opt) > EVT_MAX_PORTS) { 382 evt_err("number of ports exceeds %d", EVT_MAX_PORTS); 383 return -1; 384 } 385 386 /* Fixups */ 387 if (opt->nb_stages == 1 && opt->fwd_latency) { 388 evt_info("fwd_latency is valid when nb_stages > 1, disabling"); 389 opt->fwd_latency = 0; 390 } 391 if (opt->fwd_latency && !opt->q_priority) { 392 evt_info("enabled queue priority for latency measurement"); 393 opt->q_priority = 1; 394 } 395 if (opt->nb_pkts == 0) 396 opt->nb_pkts = INT64_MAX/evt_nr_active_lcores(opt->plcores); 397 398 return 0; 399 } 400 401 void 402 perf_opt_dump(struct evt_options *opt, uint8_t nb_queues) 403 { 404 evt_dump("nb_prod_lcores", "%d", evt_nr_active_lcores(opt->plcores)); 405 evt_dump_producer_lcores(opt); 406 evt_dump("nb_worker_lcores", "%d", evt_nr_active_lcores(opt->wlcores)); 407 evt_dump_worker_lcores(opt); 408 if (!evt_has_distributed_sched(opt->dev_id)) 409 evt_dump_scheduler_lcore(opt); 410 evt_dump_nb_stages(opt); 411 evt_dump("nb_evdev_ports", "%d", perf_nb_event_ports(opt)); 412 evt_dump("nb_evdev_queues", "%d", nb_queues); 413 evt_dump_queue_priority(opt); 414 evt_dump_sched_type_list(opt); 415 } 416 417 void 418 perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt) 419 { 420 RTE_SET_USED(test); 421 422 rte_event_dev_stop(opt->dev_id); 423 rte_event_dev_close(opt->dev_id); 424 } 425 426 static inline void 427 perf_elt_init(struct rte_mempool *mp, void *arg __rte_unused, 428 void *obj, unsigned i __rte_unused) 429 { 430 memset(obj, 0, mp->elt_size); 431 } 432 433 int 434 perf_mempool_setup(struct evt_test *test, struct evt_options *opt) 435 { 436 struct test_perf *t = evt_test_priv(test); 437 438 t->pool = rte_mempool_create(test->name, /* mempool name */ 439 opt->pool_sz, /* number of elements*/ 440 sizeof(struct perf_elt), /* element size*/ 441 512, /* cache size*/ 442 0, NULL, NULL, 443 perf_elt_init, /* obj constructor */ 444 NULL, opt->socket_id, 0); /* flags */ 445 if (t->pool == NULL) { 446 evt_err("failed to create mempool"); 447 return -ENOMEM; 448 } 449 450 return 0; 451 } 452 453 void 454 perf_mempool_destroy(struct evt_test *test, struct evt_options *opt) 455 { 456 RTE_SET_USED(opt); 457 struct test_perf *t = evt_test_priv(test); 458 459 rte_mempool_free(t->pool); 460 } 461 462 int 463 perf_test_setup(struct evt_test *test, struct evt_options *opt) 464 { 465 void *test_perf; 466 467 test_perf = rte_zmalloc_socket(test->name, sizeof(struct test_perf), 468 RTE_CACHE_LINE_SIZE, opt->socket_id); 469 if (test_perf == NULL) { 470 evt_err("failed to allocate test_perf memory"); 471 goto nomem; 472 } 473 test->test_priv = test_perf; 474 475 struct test_perf *t = evt_test_priv(test); 476 477 t->outstand_pkts = opt->nb_pkts * evt_nr_active_lcores(opt->plcores); 478 t->nb_workers = evt_nr_active_lcores(opt->wlcores); 479 t->done = false; 480 t->nb_pkts = opt->nb_pkts; 481 t->nb_flows = opt->nb_flows; 482 t->result = EVT_TEST_FAILED; 483 t->opt = opt; 484 memcpy(t->sched_type_list, opt->sched_type_list, 485 sizeof(opt->sched_type_list)); 486 return 0; 487 nomem: 488 return -ENOMEM; 489 } 490 491 void 492 perf_test_destroy(struct evt_test *test, struct evt_options *opt) 493 { 494 RTE_SET_USED(opt); 495 496 rte_free(test->test_priv); 497 } 498