1 /* 2 * BSD LICENSE 3 * 4 * Copyright (C) Cavium, Inc 2017. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * * Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * * Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in 14 * the documentation and/or other materials provided with the 15 * distribution. 16 * * Neither the name of Cavium, Inc nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include "test_perf_common.h" 34 35 int 36 perf_test_result(struct evt_test *test, struct evt_options *opt) 37 { 38 RTE_SET_USED(opt); 39 struct test_perf *t = evt_test_priv(test); 40 41 return t->result; 42 } 43 44 static inline int 45 perf_producer(void *arg) 46 { 47 struct prod_data *p = arg; 48 struct test_perf *t = p->t; 49 struct evt_options *opt = t->opt; 50 const uint8_t dev_id = p->dev_id; 51 const uint8_t port = p->port_id; 52 struct rte_mempool *pool = t->pool; 53 const uint64_t nb_pkts = t->nb_pkts; 54 const uint32_t nb_flows = t->nb_flows; 55 uint32_t flow_counter = 0; 56 uint64_t count = 0; 57 struct perf_elt *m; 58 struct rte_event ev; 59 60 if (opt->verbose_level > 1) 61 printf("%s(): lcore %d dev_id %d port=%d queue %d\n", __func__, 62 rte_lcore_id(), dev_id, port, p->queue_id); 63 64 ev.event = 0; 65 ev.op = RTE_EVENT_OP_NEW; 66 ev.queue_id = p->queue_id; 67 ev.sched_type = t->opt->sched_type_list[0]; 68 ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL; 69 ev.event_type = RTE_EVENT_TYPE_CPU; 70 ev.sub_event_type = 0; /* stage 0 */ 71 72 while (count < nb_pkts && t->done == false) { 73 if (rte_mempool_get(pool, (void **)&m) < 0) 74 continue; 75 76 ev.flow_id = flow_counter++ % nb_flows; 77 ev.event_ptr = m; 78 m->timestamp = rte_get_timer_cycles(); 79 while (rte_event_enqueue_burst(dev_id, port, &ev, 1) != 1) { 80 if (t->done) 81 break; 82 rte_pause(); 83 m->timestamp = rte_get_timer_cycles(); 84 } 85 count++; 86 } 87 88 return 0; 89 } 90 91 static inline uint64_t 92 processed_pkts(struct test_perf *t) 93 { 94 uint8_t i; 95 uint64_t total = 0; 96 97 rte_smp_rmb(); 98 for (i = 0; i < t->nb_workers; i++) 99 total += t->worker[i].processed_pkts; 100 101 return total; 102 } 103 104 static inline uint64_t 105 total_latency(struct test_perf *t) 106 { 107 uint8_t i; 108 uint64_t total = 0; 109 110 rte_smp_rmb(); 111 for (i = 0; i < t->nb_workers; i++) 112 total += t->worker[i].latency; 113 114 return total; 115 } 116 117 118 int 119 perf_launch_lcores(struct evt_test *test, struct evt_options *opt, 120 int (*worker)(void *)) 121 { 122 int ret, lcore_id; 123 struct test_perf *t = evt_test_priv(test); 124 125 int port_idx = 0; 126 /* launch workers */ 127 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 128 if (!(opt->wlcores[lcore_id])) 129 continue; 130 131 ret = rte_eal_remote_launch(worker, 132 &t->worker[port_idx], lcore_id); 133 if (ret) { 134 evt_err("failed to launch worker %d", lcore_id); 135 return ret; 136 } 137 port_idx++; 138 } 139 140 /* launch producers */ 141 RTE_LCORE_FOREACH_SLAVE(lcore_id) { 142 if (!(opt->plcores[lcore_id])) 143 continue; 144 145 ret = rte_eal_remote_launch(perf_producer, &t->prod[port_idx], 146 lcore_id); 147 if (ret) { 148 evt_err("failed to launch perf_producer %d", lcore_id); 149 return ret; 150 } 151 port_idx++; 152 } 153 154 const uint64_t total_pkts = opt->nb_pkts * 155 evt_nr_active_lcores(opt->plcores); 156 157 uint64_t dead_lock_cycles = rte_get_timer_cycles(); 158 int64_t dead_lock_remaining = total_pkts; 159 const uint64_t dead_lock_sample = rte_get_timer_hz() * 5; 160 161 uint64_t perf_cycles = rte_get_timer_cycles(); 162 int64_t perf_remaining = total_pkts; 163 const uint64_t perf_sample = rte_get_timer_hz(); 164 165 static float total_mpps; 166 static uint64_t samples; 167 168 const uint64_t freq_mhz = rte_get_timer_hz() / 1000000; 169 int64_t remaining = t->outstand_pkts - processed_pkts(t); 170 171 while (t->done == false) { 172 const uint64_t new_cycles = rte_get_timer_cycles(); 173 174 if ((new_cycles - perf_cycles) > perf_sample) { 175 const uint64_t latency = total_latency(t); 176 const uint64_t pkts = processed_pkts(t); 177 178 remaining = t->outstand_pkts - pkts; 179 float mpps = (float)(perf_remaining-remaining)/1000000; 180 181 perf_remaining = remaining; 182 perf_cycles = new_cycles; 183 total_mpps += mpps; 184 ++samples; 185 if (opt->fwd_latency && pkts > 0) { 186 printf(CLGRN"\r%.3f mpps avg %.3f mpps [avg fwd latency %.3f us] "CLNRM, 187 mpps, total_mpps/samples, 188 (float)(latency/pkts)/freq_mhz); 189 } else { 190 printf(CLGRN"\r%.3f mpps avg %.3f mpps"CLNRM, 191 mpps, total_mpps/samples); 192 } 193 fflush(stdout); 194 195 if (remaining <= 0) { 196 t->done = true; 197 t->result = EVT_TEST_SUCCESS; 198 rte_smp_wmb(); 199 break; 200 } 201 } 202 203 if (new_cycles - dead_lock_cycles > dead_lock_sample) { 204 remaining = t->outstand_pkts - processed_pkts(t); 205 if (dead_lock_remaining == remaining) { 206 rte_event_dev_dump(opt->dev_id, stdout); 207 evt_err("No schedules for seconds, deadlock"); 208 t->done = true; 209 rte_smp_wmb(); 210 break; 211 } 212 dead_lock_remaining = remaining; 213 dead_lock_cycles = new_cycles; 214 } 215 } 216 printf("\n"); 217 return 0; 218 } 219 220 int 221 perf_event_dev_port_setup(struct evt_test *test, struct evt_options *opt, 222 uint8_t stride, uint8_t nb_queues) 223 { 224 struct test_perf *t = evt_test_priv(test); 225 uint8_t port, prod; 226 int ret = -1; 227 228 /* port configuration */ 229 const struct rte_event_port_conf wkr_p_conf = { 230 .dequeue_depth = opt->wkr_deq_dep, 231 .enqueue_depth = 64, 232 .new_event_threshold = 4096, 233 }; 234 235 /* setup one port per worker, linking to all queues */ 236 for (port = 0; port < evt_nr_active_lcores(opt->wlcores); 237 port++) { 238 struct worker_data *w = &t->worker[port]; 239 240 w->dev_id = opt->dev_id; 241 w->port_id = port; 242 w->t = t; 243 w->processed_pkts = 0; 244 w->latency = 0; 245 246 ret = rte_event_port_setup(opt->dev_id, port, &wkr_p_conf); 247 if (ret) { 248 evt_err("failed to setup port %d", port); 249 return ret; 250 } 251 252 ret = rte_event_port_link(opt->dev_id, port, NULL, NULL, 0); 253 if (ret != nb_queues) { 254 evt_err("failed to link all queues to port %d", port); 255 return -EINVAL; 256 } 257 } 258 259 /* port for producers, no links */ 260 const struct rte_event_port_conf prod_conf = { 261 .dequeue_depth = 8, 262 .enqueue_depth = 32, 263 .new_event_threshold = 1200, 264 }; 265 prod = 0; 266 for ( ; port < perf_nb_event_ports(opt); port++) { 267 struct prod_data *p = &t->prod[port]; 268 269 p->dev_id = opt->dev_id; 270 p->port_id = port; 271 p->queue_id = prod * stride; 272 p->t = t; 273 274 ret = rte_event_port_setup(opt->dev_id, port, &prod_conf); 275 if (ret) { 276 evt_err("failed to setup port %d", port); 277 return ret; 278 } 279 prod++; 280 } 281 282 return ret; 283 } 284 285 int 286 perf_opt_check(struct evt_options *opt, uint64_t nb_queues) 287 { 288 unsigned int lcores; 289 290 /* N producer + N worker + 1 master */ 291 lcores = 3; 292 293 if (rte_lcore_count() < lcores) { 294 evt_err("test need minimum %d lcores", lcores); 295 return -1; 296 } 297 298 /* Validate worker lcores */ 299 if (evt_lcores_has_overlap(opt->wlcores, rte_get_master_lcore())) { 300 evt_err("worker lcores overlaps with master lcore"); 301 return -1; 302 } 303 if (evt_lcores_has_overlap_multi(opt->wlcores, opt->plcores)) { 304 evt_err("worker lcores overlaps producer lcores"); 305 return -1; 306 } 307 if (evt_has_disabled_lcore(opt->wlcores)) { 308 evt_err("one or more workers lcores are not enabled"); 309 return -1; 310 } 311 if (!evt_has_active_lcore(opt->wlcores)) { 312 evt_err("minimum one worker is required"); 313 return -1; 314 } 315 316 /* Validate producer lcores */ 317 if (evt_lcores_has_overlap(opt->plcores, rte_get_master_lcore())) { 318 evt_err("producer lcores overlaps with master lcore"); 319 return -1; 320 } 321 if (evt_has_disabled_lcore(opt->plcores)) { 322 evt_err("one or more producer lcores are not enabled"); 323 return -1; 324 } 325 if (!evt_has_active_lcore(opt->plcores)) { 326 evt_err("minimum one producer is required"); 327 return -1; 328 } 329 330 if (evt_has_invalid_stage(opt)) 331 return -1; 332 333 if (evt_has_invalid_sched_type(opt)) 334 return -1; 335 336 if (nb_queues > EVT_MAX_QUEUES) { 337 evt_err("number of queues exceeds %d", EVT_MAX_QUEUES); 338 return -1; 339 } 340 if (perf_nb_event_ports(opt) > EVT_MAX_PORTS) { 341 evt_err("number of ports exceeds %d", EVT_MAX_PORTS); 342 return -1; 343 } 344 345 /* Fixups */ 346 if (opt->nb_stages == 1 && opt->fwd_latency) { 347 evt_info("fwd_latency is valid when nb_stages > 1, disabling"); 348 opt->fwd_latency = 0; 349 } 350 if (opt->fwd_latency && !opt->q_priority) { 351 evt_info("enabled queue priority for latency measurement"); 352 opt->q_priority = 1; 353 } 354 if (opt->nb_pkts == 0) 355 opt->nb_pkts = INT64_MAX/evt_nr_active_lcores(opt->plcores); 356 357 return 0; 358 } 359 360 void 361 perf_opt_dump(struct evt_options *opt, uint8_t nb_queues) 362 { 363 evt_dump("nb_prod_lcores", "%d", evt_nr_active_lcores(opt->plcores)); 364 evt_dump_producer_lcores(opt); 365 evt_dump("nb_worker_lcores", "%d", evt_nr_active_lcores(opt->wlcores)); 366 evt_dump_worker_lcores(opt); 367 evt_dump_nb_stages(opt); 368 evt_dump("nb_evdev_ports", "%d", perf_nb_event_ports(opt)); 369 evt_dump("nb_evdev_queues", "%d", nb_queues); 370 evt_dump_queue_priority(opt); 371 evt_dump_sched_type_list(opt); 372 } 373 374 void 375 perf_eventdev_destroy(struct evt_test *test, struct evt_options *opt) 376 { 377 RTE_SET_USED(test); 378 379 rte_event_dev_stop(opt->dev_id); 380 rte_event_dev_close(opt->dev_id); 381 } 382 383 static inline void 384 perf_elt_init(struct rte_mempool *mp, void *arg __rte_unused, 385 void *obj, unsigned i __rte_unused) 386 { 387 memset(obj, 0, mp->elt_size); 388 } 389 390 int 391 perf_mempool_setup(struct evt_test *test, struct evt_options *opt) 392 { 393 struct test_perf *t = evt_test_priv(test); 394 395 t->pool = rte_mempool_create(test->name, /* mempool name */ 396 opt->pool_sz, /* number of elements*/ 397 sizeof(struct perf_elt), /* element size*/ 398 512, /* cache size*/ 399 0, NULL, NULL, 400 perf_elt_init, /* obj constructor */ 401 NULL, opt->socket_id, 0); /* flags */ 402 if (t->pool == NULL) { 403 evt_err("failed to create mempool"); 404 return -ENOMEM; 405 } 406 407 return 0; 408 } 409 410 void 411 perf_mempool_destroy(struct evt_test *test, struct evt_options *opt) 412 { 413 RTE_SET_USED(opt); 414 struct test_perf *t = evt_test_priv(test); 415 416 rte_mempool_free(t->pool); 417 } 418 419 int 420 perf_test_setup(struct evt_test *test, struct evt_options *opt) 421 { 422 void *test_perf; 423 424 test_perf = rte_zmalloc_socket(test->name, sizeof(struct test_perf), 425 RTE_CACHE_LINE_SIZE, opt->socket_id); 426 if (test_perf == NULL) { 427 evt_err("failed to allocate test_perf memory"); 428 goto nomem; 429 } 430 test->test_priv = test_perf; 431 432 struct test_perf *t = evt_test_priv(test); 433 434 t->outstand_pkts = opt->nb_pkts * evt_nr_active_lcores(opt->plcores); 435 t->nb_workers = evt_nr_active_lcores(opt->wlcores); 436 t->done = false; 437 t->nb_pkts = opt->nb_pkts; 438 t->nb_flows = opt->nb_flows; 439 t->result = EVT_TEST_FAILED; 440 t->opt = opt; 441 memcpy(t->sched_type_list, opt->sched_type_list, 442 sizeof(opt->sched_type_list)); 443 return 0; 444 nomem: 445 return -ENOMEM; 446 } 447 448 void 449 perf_test_destroy(struct evt_test *test, struct evt_options *opt) 450 { 451 RTE_SET_USED(opt); 452 453 rte_free(test->test_priv); 454 } 455