1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2016-2017 Intel Corporation. All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * * Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * * Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in 14 * the documentation and/or other materials provided with the 15 * distribution. 16 * * Neither the name of Intel Corporation nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <rte_malloc.h> 34 #include <rte_cycles.h> 35 #include <rte_crypto.h> 36 #include <rte_cryptodev.h> 37 38 #include "cperf_test_throughput.h" 39 #include "cperf_ops.h" 40 41 struct cperf_throughput_results { 42 uint64_t ops_enqueued; 43 uint64_t ops_dequeued; 44 45 uint64_t ops_enqueued_failed; 46 uint64_t ops_dequeued_failed; 47 48 uint64_t ops_failed; 49 50 double ops_per_second; 51 double throughput_gbps; 52 double cycles_per_byte; 53 }; 54 55 struct cperf_throughput_ctx { 56 uint8_t dev_id; 57 uint16_t qp_id; 58 uint8_t lcore_id; 59 60 struct rte_mempool *pkt_mbuf_pool_in; 61 struct rte_mempool *pkt_mbuf_pool_out; 62 struct rte_mbuf **mbufs_in; 63 struct rte_mbuf **mbufs_out; 64 65 struct rte_mempool *crypto_op_pool; 66 67 struct rte_cryptodev_sym_session *sess; 68 69 cperf_populate_ops_t populate_ops; 70 cperf_verify_crypto_op_t verify_op_output; 71 72 const struct cperf_options *options; 73 const struct cperf_test_vector *test_vector; 74 struct cperf_throughput_results results; 75 76 }; 77 78 struct cperf_op_result { 79 enum rte_crypto_op_status status; 80 }; 81 82 static void 83 cperf_throughput_test_free(struct cperf_throughput_ctx *ctx, uint32_t mbuf_nb) 84 { 85 uint32_t i; 86 87 if (ctx) { 88 if (ctx->sess) 89 rte_cryptodev_sym_session_free(ctx->dev_id, ctx->sess); 90 91 if (ctx->mbufs_in) { 92 for (i = 0; i < mbuf_nb; i++) 93 rte_pktmbuf_free(ctx->mbufs_in[i]); 94 95 rte_free(ctx->mbufs_in); 96 } 97 98 if (ctx->mbufs_out) { 99 for (i = 0; i < mbuf_nb; i++) { 100 if (ctx->mbufs_out[i] != NULL) 101 rte_pktmbuf_free(ctx->mbufs_out[i]); 102 } 103 104 rte_free(ctx->mbufs_out); 105 } 106 107 if (ctx->pkt_mbuf_pool_in) 108 rte_mempool_free(ctx->pkt_mbuf_pool_in); 109 110 if (ctx->pkt_mbuf_pool_out) 111 rte_mempool_free(ctx->pkt_mbuf_pool_out); 112 113 if (ctx->crypto_op_pool) 114 rte_mempool_free(ctx->crypto_op_pool); 115 116 rte_free(ctx); 117 } 118 } 119 120 static struct rte_mbuf * 121 cperf_mbuf_create(struct rte_mempool *mempool, 122 uint32_t segments_nb, 123 const struct cperf_options *options, 124 const struct cperf_test_vector *test_vector) 125 { 126 struct rte_mbuf *mbuf; 127 uint32_t segment_sz = options->buffer_sz / segments_nb; 128 uint32_t last_sz = options->buffer_sz % segments_nb; 129 uint8_t *mbuf_data; 130 uint8_t *test_data = 131 (options->cipher_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) ? 132 test_vector->plaintext.data : 133 test_vector->ciphertext.data; 134 135 mbuf = rte_pktmbuf_alloc(mempool); 136 if (mbuf == NULL) 137 goto error; 138 139 mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, segment_sz); 140 if (mbuf_data == NULL) 141 goto error; 142 143 memcpy(mbuf_data, test_data, segment_sz); 144 test_data += segment_sz; 145 segments_nb--; 146 147 while (segments_nb) { 148 struct rte_mbuf *m; 149 150 m = rte_pktmbuf_alloc(mempool); 151 if (m == NULL) 152 goto error; 153 154 rte_pktmbuf_chain(mbuf, m); 155 156 mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, segment_sz); 157 if (mbuf_data == NULL) 158 goto error; 159 160 memcpy(mbuf_data, test_data, segment_sz); 161 test_data += segment_sz; 162 segments_nb--; 163 } 164 165 if (last_sz) { 166 mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, last_sz); 167 if (mbuf_data == NULL) 168 goto error; 169 170 memcpy(mbuf_data, test_data, last_sz); 171 } 172 173 mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, 174 options->auth_digest_sz); 175 if (mbuf_data == NULL) 176 goto error; 177 178 if (options->op_type == CPERF_AEAD) { 179 uint8_t *aead = (uint8_t *)rte_pktmbuf_prepend(mbuf, 180 RTE_ALIGN_CEIL(options->auth_aad_sz, 16)); 181 182 if (aead == NULL) 183 goto error; 184 185 memcpy(aead, test_vector->aad.data, test_vector->aad.length); 186 } 187 188 return mbuf; 189 error: 190 if (mbuf != NULL) 191 rte_pktmbuf_free(mbuf); 192 193 return NULL; 194 } 195 196 void * 197 cperf_throughput_test_constructor(uint8_t dev_id, uint16_t qp_id, 198 const struct cperf_options *options, 199 const struct cperf_test_vector *test_vector, 200 const struct cperf_op_fns *op_fns) 201 { 202 struct cperf_throughput_ctx *ctx = NULL; 203 unsigned int mbuf_idx = 0; 204 char pool_name[32] = ""; 205 206 ctx = rte_malloc(NULL, sizeof(struct cperf_throughput_ctx), 0); 207 if (ctx == NULL) 208 goto err; 209 210 ctx->dev_id = dev_id; 211 ctx->qp_id = qp_id; 212 213 ctx->populate_ops = op_fns->populate_ops; 214 ctx->options = options; 215 ctx->test_vector = test_vector; 216 217 ctx->sess = op_fns->sess_create(dev_id, options, test_vector); 218 if (ctx->sess == NULL) 219 goto err; 220 221 snprintf(pool_name, sizeof(pool_name), "cperf_pool_in_cdev_%d", 222 dev_id); 223 224 ctx->pkt_mbuf_pool_in = rte_pktmbuf_pool_create(pool_name, 225 options->pool_sz * options->segments_nb, 0, 0, 226 RTE_PKTMBUF_HEADROOM + 227 RTE_CACHE_LINE_ROUNDUP( 228 (options->buffer_sz / options->segments_nb) + 229 (options->buffer_sz % options->segments_nb) + 230 options->auth_digest_sz), 231 rte_socket_id()); 232 233 if (ctx->pkt_mbuf_pool_in == NULL) 234 goto err; 235 236 /* Generate mbufs_in with plaintext populated for test */ 237 if (ctx->options->pool_sz % ctx->options->burst_sz) 238 goto err; 239 240 ctx->mbufs_in = rte_malloc(NULL, 241 (sizeof(struct rte_mbuf *) * ctx->options->pool_sz), 0); 242 243 for (mbuf_idx = 0; mbuf_idx < options->pool_sz; mbuf_idx++) { 244 ctx->mbufs_in[mbuf_idx] = cperf_mbuf_create( 245 ctx->pkt_mbuf_pool_in, options->segments_nb, 246 options, test_vector); 247 if (ctx->mbufs_in[mbuf_idx] == NULL) 248 goto err; 249 } 250 251 if (options->out_of_place == 1) { 252 253 snprintf(pool_name, sizeof(pool_name), "cperf_pool_out_cdev_%d", 254 dev_id); 255 256 ctx->pkt_mbuf_pool_out = rte_pktmbuf_pool_create( 257 pool_name, options->pool_sz, 0, 0, 258 RTE_PKTMBUF_HEADROOM + 259 RTE_CACHE_LINE_ROUNDUP( 260 options->buffer_sz + 261 options->auth_digest_sz), 262 rte_socket_id()); 263 264 if (ctx->pkt_mbuf_pool_out == NULL) 265 goto err; 266 } 267 268 ctx->mbufs_out = rte_malloc(NULL, 269 (sizeof(struct rte_mbuf *) * 270 ctx->options->pool_sz), 0); 271 272 for (mbuf_idx = 0; mbuf_idx < options->pool_sz; mbuf_idx++) { 273 if (options->out_of_place == 1) { 274 ctx->mbufs_out[mbuf_idx] = cperf_mbuf_create( 275 ctx->pkt_mbuf_pool_out, 1, 276 options, test_vector); 277 if (ctx->mbufs_out[mbuf_idx] == NULL) 278 goto err; 279 } else { 280 ctx->mbufs_out[mbuf_idx] = NULL; 281 } 282 } 283 284 snprintf(pool_name, sizeof(pool_name), "cperf_op_pool_cdev_%d", 285 dev_id); 286 287 ctx->crypto_op_pool = rte_crypto_op_pool_create(pool_name, 288 RTE_CRYPTO_OP_TYPE_SYMMETRIC, options->pool_sz, 0, 0, 289 rte_socket_id()); 290 if (ctx->crypto_op_pool == NULL) 291 goto err; 292 293 return ctx; 294 err: 295 cperf_throughput_test_free(ctx, mbuf_idx); 296 297 return NULL; 298 } 299 300 static int 301 cperf_throughput_test_verifier(struct rte_mbuf *mbuf, 302 const struct cperf_options *options, 303 const struct cperf_test_vector *vector) 304 { 305 const struct rte_mbuf *m; 306 uint32_t len; 307 uint16_t nb_segs; 308 uint8_t *data; 309 uint32_t cipher_offset, auth_offset; 310 uint8_t cipher, auth; 311 int res = 0; 312 313 m = mbuf; 314 nb_segs = m->nb_segs; 315 len = 0; 316 while (m && nb_segs != 0) { 317 len += m->data_len; 318 m = m->next; 319 nb_segs--; 320 } 321 322 data = rte_malloc(NULL, len, 0); 323 if (data == NULL) 324 return 1; 325 326 m = mbuf; 327 nb_segs = m->nb_segs; 328 len = 0; 329 while (m && nb_segs != 0) { 330 memcpy(data + len, rte_pktmbuf_mtod(m, uint8_t *), 331 m->data_len); 332 len += m->data_len; 333 m = m->next; 334 nb_segs--; 335 } 336 337 switch (options->op_type) { 338 case CPERF_CIPHER_ONLY: 339 cipher = 1; 340 cipher_offset = 0; 341 auth = 0; 342 auth_offset = 0; 343 break; 344 case CPERF_CIPHER_THEN_AUTH: 345 cipher = 1; 346 cipher_offset = 0; 347 auth = 1; 348 auth_offset = vector->plaintext.length; 349 break; 350 case CPERF_AUTH_ONLY: 351 cipher = 0; 352 cipher_offset = 0; 353 auth = 1; 354 auth_offset = vector->plaintext.length; 355 break; 356 case CPERF_AUTH_THEN_CIPHER: 357 cipher = 1; 358 cipher_offset = 0; 359 auth = 1; 360 auth_offset = vector->plaintext.length; 361 break; 362 case CPERF_AEAD: 363 cipher = 1; 364 cipher_offset = vector->aad.length; 365 auth = 1; 366 auth_offset = vector->aad.length + vector->plaintext.length; 367 break; 368 } 369 370 if (cipher == 1) { 371 if (options->cipher_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) 372 res += memcmp(data + cipher_offset, 373 vector->ciphertext.data, 374 vector->ciphertext.length); 375 else 376 res += memcmp(data + cipher_offset, 377 vector->plaintext.data, 378 vector->plaintext.length); 379 } 380 381 if (auth == 1) { 382 if (options->auth_op == RTE_CRYPTO_AUTH_OP_GENERATE) 383 res += memcmp(data + auth_offset, 384 vector->digest.data, 385 vector->digest.length); 386 } 387 388 if (res != 0) 389 res = 1; 390 391 return res; 392 } 393 394 int 395 cperf_throughput_test_runner(void *test_ctx) 396 { 397 struct cperf_throughput_ctx *ctx = test_ctx; 398 struct cperf_op_result *res, *pres; 399 400 if (ctx->options->verify) { 401 res = rte_malloc(NULL, sizeof(struct cperf_op_result) * 402 ctx->options->total_ops, 0); 403 if (res == NULL) 404 return 0; 405 } 406 407 uint64_t ops_enqd = 0, ops_enqd_total = 0, ops_enqd_failed = 0; 408 uint64_t ops_deqd = 0, ops_deqd_total = 0, ops_deqd_failed = 0; 409 410 uint64_t i, m_idx = 0, tsc_start, tsc_end, tsc_duration; 411 412 uint16_t ops_unused = 0; 413 uint64_t idx = 0; 414 415 struct rte_crypto_op *ops[ctx->options->burst_sz]; 416 struct rte_crypto_op *ops_processed[ctx->options->burst_sz]; 417 418 uint32_t lcore = rte_lcore_id(); 419 420 #ifdef CPERF_LINEARIZATION_ENABLE 421 struct rte_cryptodev_info dev_info; 422 int linearize = 0; 423 424 /* Check if source mbufs require coalescing */ 425 if (ctx->options->segments_nb > 1) { 426 rte_cryptodev_info_get(ctx->dev_id, &dev_info); 427 if ((dev_info.feature_flags & 428 RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER) == 0) 429 linearize = 1; 430 } 431 #endif /* CPERF_LINEARIZATION_ENABLE */ 432 433 ctx->lcore_id = lcore; 434 435 if (!ctx->options->csv) 436 printf("\n# Running throughput test on device: %u, lcore: %u\n", 437 ctx->dev_id, lcore); 438 439 /* Warm up the host CPU before starting the test */ 440 for (i = 0; i < ctx->options->total_ops; i++) 441 rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0); 442 443 tsc_start = rte_rdtsc_precise(); 444 445 while (ops_enqd_total < ctx->options->total_ops) { 446 447 uint16_t burst_size = ((ops_enqd_total + ctx->options->burst_sz) 448 <= ctx->options->total_ops) ? 449 ctx->options->burst_sz : 450 ctx->options->total_ops - 451 ops_enqd_total; 452 453 uint16_t ops_needed = burst_size - ops_unused; 454 455 /* Allocate crypto ops from pool */ 456 if (ops_needed != rte_crypto_op_bulk_alloc( 457 ctx->crypto_op_pool, 458 RTE_CRYPTO_OP_TYPE_SYMMETRIC, 459 ops, ops_needed)) 460 return -1; 461 462 /* Setup crypto op, attach mbuf etc */ 463 (ctx->populate_ops)(ops, &ctx->mbufs_in[m_idx], 464 &ctx->mbufs_out[m_idx], 465 ops_needed, ctx->sess, ctx->options, 466 ctx->test_vector); 467 468 if (ctx->options->verify) { 469 for (i = 0; i < ops_needed; i++) { 470 ops[i]->opaque_data = (void *)&res[idx]; 471 idx++; 472 } 473 } 474 475 #ifdef CPERF_LINEARIZATION_ENABLE 476 if (linearize) { 477 /* PMD doesn't support scatter-gather and source buffer 478 * is segmented. 479 * We need to linearize it before enqueuing. 480 */ 481 for (i = 0; i < burst_size; i++) 482 rte_pktmbuf_linearize(ops[i]->sym->m_src); 483 } 484 #endif /* CPERF_LINEARIZATION_ENABLE */ 485 486 /* Enqueue burst of ops on crypto device */ 487 ops_enqd = rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, 488 ops, burst_size); 489 if (ops_enqd < burst_size) 490 ops_enqd_failed++; 491 492 /** 493 * Calculate number of ops not enqueued (mainly for hw 494 * accelerators whose ingress queue can fill up). 495 */ 496 ops_unused = burst_size - ops_enqd; 497 ops_enqd_total += ops_enqd; 498 499 500 /* Dequeue processed burst of ops from crypto device */ 501 ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id, 502 ops_processed, ctx->options->burst_sz); 503 504 if (likely(ops_deqd)) { 505 506 if (ctx->options->verify) { 507 void *opq; 508 for (i = 0; i < ops_deqd; i++) { 509 opq = (ops_processed[i]->opaque_data); 510 pres = (struct cperf_op_result *)opq; 511 pres->status = ops_processed[i]->status; 512 } 513 } 514 515 /* free crypto ops so they can be reused. We don't free 516 * the mbufs here as we don't want to reuse them as 517 * the crypto operation will change the data and cause 518 * failures. 519 */ 520 for (i = 0; i < ops_deqd; i++) 521 rte_crypto_op_free(ops_processed[i]); 522 523 ops_deqd_total += ops_deqd; 524 } else { 525 /** 526 * Count dequeue polls which didn't return any 527 * processed operations. This statistic is mainly 528 * relevant to hw accelerators. 529 */ 530 ops_deqd_failed++; 531 } 532 533 m_idx += ops_needed; 534 m_idx = m_idx + ctx->options->burst_sz > ctx->options->pool_sz ? 535 0 : m_idx; 536 } 537 538 /* Dequeue any operations still in the crypto device */ 539 540 while (ops_deqd_total < ctx->options->total_ops) { 541 /* Sending 0 length burst to flush sw crypto device */ 542 rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0); 543 544 /* dequeue burst */ 545 ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id, 546 ops_processed, ctx->options->burst_sz); 547 if (ops_deqd == 0) 548 ops_deqd_failed++; 549 else { 550 if (ctx->options->verify) { 551 void *opq; 552 for (i = 0; i < ops_deqd; i++) { 553 opq = (ops_processed[i]->opaque_data); 554 pres = (struct cperf_op_result *)opq; 555 pres->status = ops_processed[i]->status; 556 } 557 } 558 559 for (i = 0; i < ops_deqd; i++) 560 rte_crypto_op_free(ops_processed[i]); 561 562 ops_deqd_total += ops_deqd; 563 } 564 } 565 566 tsc_end = rte_rdtsc_precise(); 567 tsc_duration = (tsc_end - tsc_start); 568 569 if (ctx->options->verify) { 570 struct rte_mbuf **mbufs; 571 572 if (ctx->options->out_of_place == 1) 573 mbufs = ctx->mbufs_out; 574 else 575 mbufs = ctx->mbufs_in; 576 577 for (i = 0; i < ctx->options->total_ops; i++) { 578 579 if (res[i].status != RTE_CRYPTO_OP_STATUS_SUCCESS || 580 cperf_throughput_test_verifier( 581 mbufs[i], ctx->options, 582 ctx->test_vector)) { 583 584 ctx->results.ops_failed++; 585 } 586 } 587 588 rte_free(res); 589 } 590 591 /* Calculate average operations processed per second */ 592 ctx->results.ops_per_second = ((double)ctx->options->total_ops / 593 tsc_duration) * rte_get_tsc_hz(); 594 595 /* Calculate average throughput (Gbps) in bits per second */ 596 ctx->results.throughput_gbps = ((ctx->results.ops_per_second * 597 ctx->options->buffer_sz * 8) / 1000000000); 598 599 600 /* Calculate average cycles per byte */ 601 ctx->results.cycles_per_byte = ((double)tsc_duration / 602 ctx->options->total_ops) / ctx->options->buffer_sz; 603 604 ctx->results.ops_enqueued = ops_enqd_total; 605 ctx->results.ops_dequeued = ops_deqd_total; 606 607 ctx->results.ops_enqueued_failed = ops_enqd_failed; 608 ctx->results.ops_dequeued_failed = ops_deqd_failed; 609 610 return 0; 611 } 612 613 614 615 void 616 cperf_throughput_test_destructor(void *arg) 617 { 618 struct cperf_throughput_ctx *ctx = arg; 619 struct cperf_throughput_results *results = &ctx->results; 620 static int only_once; 621 622 if (ctx == NULL) 623 return; 624 625 if (!ctx->options->csv) { 626 printf("\n# Device %d on lcore %u\n", 627 ctx->dev_id, ctx->lcore_id); 628 printf("# Buffer Size(B)\t Enqueued\t Dequeued\tFailed Enq" 629 "\tFailed Deq\tOps(Millions)\tThroughput(Gbps)" 630 "\tCycles Per Byte\n"); 631 632 printf("\n%16u\t%10"PRIu64"\t%10"PRIu64"\t%10"PRIu64"\t" 633 "%10"PRIu64"\t%16.4f\t%16.4f\t%15.2f\n", 634 ctx->options->buffer_sz, 635 results->ops_enqueued, 636 results->ops_dequeued, 637 results->ops_enqueued_failed, 638 results->ops_dequeued_failed, 639 results->ops_per_second/1000000, 640 results->throughput_gbps, 641 results->cycles_per_byte); 642 } else { 643 if (!only_once) 644 printf("\n# CPU lcore id, Burst Size(B), " 645 "Buffer Size(B),Enqueued,Dequeued,Failed Enq," 646 "Failed Deq,Ops(Millions),Throughput(Gbps)," 647 "Cycles Per Byte\n"); 648 only_once = 1; 649 650 printf("%u;%u;%u;%"PRIu64";%"PRIu64";%"PRIu64";%"PRIu64";" 651 "%.f3;%.f3;%.f3\n", 652 ctx->lcore_id, 653 ctx->options->burst_sz, 654 ctx->options->buffer_sz, 655 results->ops_enqueued, 656 results->ops_dequeued, 657 results->ops_enqueued_failed, 658 results->ops_dequeued_failed, 659 results->ops_per_second/1000000, 660 results->throughput_gbps, 661 results->cycles_per_byte); 662 } 663 664 cperf_throughput_test_free(ctx, ctx->options->pool_sz); 665 } 666