1 /*- 2 * BSD LICENSE 3 * 4 * Copyright(c) 2016-2017 Intel Corporation. All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * * Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * * Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in 14 * the documentation and/or other materials provided with the 15 * distribution. 16 * * Neither the name of Intel Corporation nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <rte_malloc.h> 34 #include <rte_cycles.h> 35 #include <rte_crypto.h> 36 #include <rte_cryptodev.h> 37 38 #include "cperf_test_throughput.h" 39 #include "cperf_ops.h" 40 41 struct cperf_throughput_ctx { 42 uint8_t dev_id; 43 uint16_t qp_id; 44 uint8_t lcore_id; 45 46 struct rte_mempool *pkt_mbuf_pool_in; 47 struct rte_mempool *pkt_mbuf_pool_out; 48 struct rte_mbuf **mbufs_in; 49 struct rte_mbuf **mbufs_out; 50 51 struct rte_mempool *crypto_op_pool; 52 53 struct rte_cryptodev_sym_session *sess; 54 55 cperf_populate_ops_t populate_ops; 56 57 const struct cperf_options *options; 58 const struct cperf_test_vector *test_vector; 59 }; 60 61 static void 62 cperf_throughput_test_free(struct cperf_throughput_ctx *ctx, uint32_t mbuf_nb) 63 { 64 uint32_t i; 65 66 if (ctx) { 67 if (ctx->sess) { 68 rte_cryptodev_sym_session_clear(ctx->dev_id, ctx->sess); 69 rte_cryptodev_sym_session_free(ctx->sess); 70 } 71 72 if (ctx->mbufs_in) { 73 for (i = 0; i < mbuf_nb; i++) 74 rte_pktmbuf_free(ctx->mbufs_in[i]); 75 76 rte_free(ctx->mbufs_in); 77 } 78 79 if (ctx->mbufs_out) { 80 for (i = 0; i < mbuf_nb; i++) { 81 if (ctx->mbufs_out[i] != NULL) 82 rte_pktmbuf_free(ctx->mbufs_out[i]); 83 } 84 85 rte_free(ctx->mbufs_out); 86 } 87 88 if (ctx->pkt_mbuf_pool_in) 89 rte_mempool_free(ctx->pkt_mbuf_pool_in); 90 91 if (ctx->pkt_mbuf_pool_out) 92 rte_mempool_free(ctx->pkt_mbuf_pool_out); 93 94 if (ctx->crypto_op_pool) 95 rte_mempool_free(ctx->crypto_op_pool); 96 97 rte_free(ctx); 98 } 99 } 100 101 static struct rte_mbuf * 102 cperf_mbuf_create(struct rte_mempool *mempool, 103 uint32_t segments_nb, 104 const struct cperf_options *options, 105 const struct cperf_test_vector *test_vector) 106 { 107 struct rte_mbuf *mbuf; 108 uint32_t segment_sz = options->max_buffer_size / segments_nb; 109 uint32_t last_sz = options->max_buffer_size % segments_nb; 110 uint8_t *mbuf_data; 111 uint8_t *test_data = 112 (options->cipher_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) ? 113 test_vector->plaintext.data : 114 test_vector->ciphertext.data; 115 116 mbuf = rte_pktmbuf_alloc(mempool); 117 if (mbuf == NULL) 118 goto error; 119 120 mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, segment_sz); 121 if (mbuf_data == NULL) 122 goto error; 123 124 memcpy(mbuf_data, test_data, segment_sz); 125 test_data += segment_sz; 126 segments_nb--; 127 128 while (segments_nb) { 129 struct rte_mbuf *m; 130 131 m = rte_pktmbuf_alloc(mempool); 132 if (m == NULL) 133 goto error; 134 135 rte_pktmbuf_chain(mbuf, m); 136 137 mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, segment_sz); 138 if (mbuf_data == NULL) 139 goto error; 140 141 memcpy(mbuf_data, test_data, segment_sz); 142 test_data += segment_sz; 143 segments_nb--; 144 } 145 146 if (last_sz) { 147 mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, last_sz); 148 if (mbuf_data == NULL) 149 goto error; 150 151 memcpy(mbuf_data, test_data, last_sz); 152 } 153 154 if (options->op_type != CPERF_CIPHER_ONLY) { 155 mbuf_data = (uint8_t *)rte_pktmbuf_append(mbuf, 156 options->digest_sz); 157 if (mbuf_data == NULL) 158 goto error; 159 } 160 161 if (options->op_type == CPERF_AEAD) { 162 uint8_t *aead = (uint8_t *)rte_pktmbuf_prepend(mbuf, 163 RTE_ALIGN_CEIL(options->aead_aad_sz, 16)); 164 165 if (aead == NULL) 166 goto error; 167 168 memcpy(aead, test_vector->aad.data, test_vector->aad.length); 169 } 170 171 return mbuf; 172 error: 173 if (mbuf != NULL) 174 rte_pktmbuf_free(mbuf); 175 176 return NULL; 177 } 178 179 void * 180 cperf_throughput_test_constructor(struct rte_mempool *sess_mp, 181 uint8_t dev_id, uint16_t qp_id, 182 const struct cperf_options *options, 183 const struct cperf_test_vector *test_vector, 184 const struct cperf_op_fns *op_fns) 185 { 186 struct cperf_throughput_ctx *ctx = NULL; 187 unsigned int mbuf_idx = 0; 188 char pool_name[32] = ""; 189 190 ctx = rte_malloc(NULL, sizeof(struct cperf_throughput_ctx), 0); 191 if (ctx == NULL) 192 goto err; 193 194 ctx->dev_id = dev_id; 195 ctx->qp_id = qp_id; 196 197 ctx->populate_ops = op_fns->populate_ops; 198 ctx->options = options; 199 ctx->test_vector = test_vector; 200 201 /* IV goes at the end of the cryptop operation */ 202 uint16_t iv_offset = sizeof(struct rte_crypto_op) + 203 sizeof(struct rte_crypto_sym_op); 204 205 ctx->sess = op_fns->sess_create(sess_mp, dev_id, options, test_vector, 206 iv_offset); 207 if (ctx->sess == NULL) 208 goto err; 209 210 snprintf(pool_name, sizeof(pool_name), "cperf_pool_in_cdev_%d", 211 dev_id); 212 213 ctx->pkt_mbuf_pool_in = rte_pktmbuf_pool_create(pool_name, 214 options->pool_sz * options->segments_nb, 0, 0, 215 RTE_PKTMBUF_HEADROOM + 216 RTE_CACHE_LINE_ROUNDUP( 217 (options->max_buffer_size / options->segments_nb) + 218 (options->max_buffer_size % options->segments_nb) + 219 options->digest_sz), 220 rte_socket_id()); 221 222 if (ctx->pkt_mbuf_pool_in == NULL) 223 goto err; 224 225 /* Generate mbufs_in with plaintext populated for test */ 226 ctx->mbufs_in = rte_malloc(NULL, 227 (sizeof(struct rte_mbuf *) * ctx->options->pool_sz), 0); 228 229 for (mbuf_idx = 0; mbuf_idx < options->pool_sz; mbuf_idx++) { 230 ctx->mbufs_in[mbuf_idx] = cperf_mbuf_create( 231 ctx->pkt_mbuf_pool_in, options->segments_nb, 232 options, test_vector); 233 if (ctx->mbufs_in[mbuf_idx] == NULL) 234 goto err; 235 } 236 237 if (options->out_of_place == 1) { 238 239 snprintf(pool_name, sizeof(pool_name), "cperf_pool_out_cdev_%d", 240 dev_id); 241 242 ctx->pkt_mbuf_pool_out = rte_pktmbuf_pool_create( 243 pool_name, options->pool_sz, 0, 0, 244 RTE_PKTMBUF_HEADROOM + 245 RTE_CACHE_LINE_ROUNDUP( 246 options->max_buffer_size + 247 options->digest_sz), 248 rte_socket_id()); 249 250 if (ctx->pkt_mbuf_pool_out == NULL) 251 goto err; 252 } 253 254 ctx->mbufs_out = rte_malloc(NULL, 255 (sizeof(struct rte_mbuf *) * 256 ctx->options->pool_sz), 0); 257 258 for (mbuf_idx = 0; mbuf_idx < options->pool_sz; mbuf_idx++) { 259 if (options->out_of_place == 1) { 260 ctx->mbufs_out[mbuf_idx] = cperf_mbuf_create( 261 ctx->pkt_mbuf_pool_out, 1, 262 options, test_vector); 263 if (ctx->mbufs_out[mbuf_idx] == NULL) 264 goto err; 265 } else { 266 ctx->mbufs_out[mbuf_idx] = NULL; 267 } 268 } 269 270 snprintf(pool_name, sizeof(pool_name), "cperf_op_pool_cdev_%d", 271 dev_id); 272 273 uint16_t priv_size = test_vector->cipher_iv.length + 274 test_vector->auth_iv.length; 275 276 ctx->crypto_op_pool = rte_crypto_op_pool_create(pool_name, 277 RTE_CRYPTO_OP_TYPE_SYMMETRIC, options->pool_sz, 278 512, priv_size, rte_socket_id()); 279 if (ctx->crypto_op_pool == NULL) 280 goto err; 281 282 return ctx; 283 err: 284 cperf_throughput_test_free(ctx, mbuf_idx); 285 286 return NULL; 287 } 288 289 int 290 cperf_throughput_test_runner(void *test_ctx) 291 { 292 struct cperf_throughput_ctx *ctx = test_ctx; 293 uint16_t test_burst_size; 294 uint8_t burst_size_idx = 0; 295 296 static int only_once; 297 298 struct rte_crypto_op *ops[ctx->options->max_burst_size]; 299 struct rte_crypto_op *ops_processed[ctx->options->max_burst_size]; 300 uint64_t i; 301 302 uint32_t lcore = rte_lcore_id(); 303 304 #ifdef CPERF_LINEARIZATION_ENABLE 305 struct rte_cryptodev_info dev_info; 306 int linearize = 0; 307 308 /* Check if source mbufs require coalescing */ 309 if (ctx->options->segments_nb > 1) { 310 rte_cryptodev_info_get(ctx->dev_id, &dev_info); 311 if ((dev_info.feature_flags & 312 RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER) == 0) 313 linearize = 1; 314 } 315 #endif /* CPERF_LINEARIZATION_ENABLE */ 316 317 ctx->lcore_id = lcore; 318 319 /* Warm up the host CPU before starting the test */ 320 for (i = 0; i < ctx->options->total_ops; i++) 321 rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0); 322 323 /* Get first size from range or list */ 324 if (ctx->options->inc_burst_size != 0) 325 test_burst_size = ctx->options->min_burst_size; 326 else 327 test_burst_size = ctx->options->burst_size_list[0]; 328 329 uint16_t iv_offset = sizeof(struct rte_crypto_op) + 330 sizeof(struct rte_crypto_sym_op); 331 332 while (test_burst_size <= ctx->options->max_burst_size) { 333 uint64_t ops_enqd = 0, ops_enqd_total = 0, ops_enqd_failed = 0; 334 uint64_t ops_deqd = 0, ops_deqd_total = 0, ops_deqd_failed = 0; 335 336 uint64_t m_idx = 0, tsc_start, tsc_end, tsc_duration; 337 338 uint16_t ops_unused = 0; 339 340 tsc_start = rte_rdtsc_precise(); 341 342 while (ops_enqd_total < ctx->options->total_ops) { 343 344 uint16_t burst_size = ((ops_enqd_total + test_burst_size) 345 <= ctx->options->total_ops) ? 346 test_burst_size : 347 ctx->options->total_ops - 348 ops_enqd_total; 349 350 uint16_t ops_needed = burst_size - ops_unused; 351 352 /* Allocate crypto ops from pool */ 353 if (ops_needed != rte_crypto_op_bulk_alloc( 354 ctx->crypto_op_pool, 355 RTE_CRYPTO_OP_TYPE_SYMMETRIC, 356 ops, ops_needed)) { 357 RTE_LOG(ERR, USER1, 358 "Failed to allocate more crypto operations " 359 "from the the crypto operation pool.\n" 360 "Consider increasing the pool size " 361 "with --pool-sz\n"); 362 return -1; 363 } 364 365 /* Setup crypto op, attach mbuf etc */ 366 (ctx->populate_ops)(ops, &ctx->mbufs_in[m_idx], 367 &ctx->mbufs_out[m_idx], 368 ops_needed, ctx->sess, ctx->options, 369 ctx->test_vector, iv_offset); 370 371 /** 372 * When ops_needed is smaller than ops_enqd, the 373 * unused ops need to be moved to the front for 374 * next round use. 375 */ 376 if (unlikely(ops_enqd > ops_needed)) { 377 size_t nb_b_to_mov = ops_unused * sizeof( 378 struct rte_crypto_op *); 379 380 memmove(&ops[ops_needed], &ops[ops_enqd], 381 nb_b_to_mov); 382 } 383 384 #ifdef CPERF_LINEARIZATION_ENABLE 385 if (linearize) { 386 /* PMD doesn't support scatter-gather and source buffer 387 * is segmented. 388 * We need to linearize it before enqueuing. 389 */ 390 for (i = 0; i < burst_size; i++) 391 rte_pktmbuf_linearize(ops[i]->sym->m_src); 392 } 393 #endif /* CPERF_LINEARIZATION_ENABLE */ 394 395 /* Enqueue burst of ops on crypto device */ 396 ops_enqd = rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, 397 ops, burst_size); 398 if (ops_enqd < burst_size) 399 ops_enqd_failed++; 400 401 /** 402 * Calculate number of ops not enqueued (mainly for hw 403 * accelerators whose ingress queue can fill up). 404 */ 405 ops_unused = burst_size - ops_enqd; 406 ops_enqd_total += ops_enqd; 407 408 409 /* Dequeue processed burst of ops from crypto device */ 410 ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id, 411 ops_processed, test_burst_size); 412 413 if (likely(ops_deqd)) { 414 /* free crypto ops so they can be reused. We don't free 415 * the mbufs here as we don't want to reuse them as 416 * the crypto operation will change the data and cause 417 * failures. 418 */ 419 rte_mempool_put_bulk(ctx->crypto_op_pool, 420 (void **)ops_processed, ops_deqd); 421 422 ops_deqd_total += ops_deqd; 423 } else { 424 /** 425 * Count dequeue polls which didn't return any 426 * processed operations. This statistic is mainly 427 * relevant to hw accelerators. 428 */ 429 ops_deqd_failed++; 430 } 431 432 m_idx += ops_needed; 433 m_idx = m_idx + test_burst_size > ctx->options->pool_sz ? 434 0 : m_idx; 435 } 436 437 /* Dequeue any operations still in the crypto device */ 438 439 while (ops_deqd_total < ctx->options->total_ops) { 440 /* Sending 0 length burst to flush sw crypto device */ 441 rte_cryptodev_enqueue_burst(ctx->dev_id, ctx->qp_id, NULL, 0); 442 443 /* dequeue burst */ 444 ops_deqd = rte_cryptodev_dequeue_burst(ctx->dev_id, ctx->qp_id, 445 ops_processed, test_burst_size); 446 if (ops_deqd == 0) 447 ops_deqd_failed++; 448 else { 449 rte_mempool_put_bulk(ctx->crypto_op_pool, 450 (void **)ops_processed, ops_deqd); 451 452 ops_deqd_total += ops_deqd; 453 } 454 } 455 456 tsc_end = rte_rdtsc_precise(); 457 tsc_duration = (tsc_end - tsc_start); 458 459 /* Calculate average operations processed per second */ 460 double ops_per_second = ((double)ctx->options->total_ops / 461 tsc_duration) * rte_get_tsc_hz(); 462 463 /* Calculate average throughput (Gbps) in bits per second */ 464 double throughput_gbps = ((ops_per_second * 465 ctx->options->test_buffer_size * 8) / 1000000000); 466 467 /* Calculate average cycles per packet */ 468 double cycles_per_packet = ((double)tsc_duration / 469 ctx->options->total_ops); 470 471 if (!ctx->options->csv) { 472 if (!only_once) 473 printf("%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s\n\n", 474 "lcore id", "Buf Size", "Burst Size", 475 "Enqueued", "Dequeued", "Failed Enq", 476 "Failed Deq", "MOps", "Gbps", 477 "Cycles/Buf"); 478 only_once = 1; 479 480 printf("%12u%12u%12u%12"PRIu64"%12"PRIu64"%12"PRIu64 481 "%12"PRIu64"%12.4f%12.4f%12.2f\n", 482 ctx->lcore_id, 483 ctx->options->test_buffer_size, 484 test_burst_size, 485 ops_enqd_total, 486 ops_deqd_total, 487 ops_enqd_failed, 488 ops_deqd_failed, 489 ops_per_second/1000000, 490 throughput_gbps, 491 cycles_per_packet); 492 } else { 493 if (!only_once) 494 printf("# lcore id, Buffer Size(B)," 495 "Burst Size,Enqueued,Dequeued,Failed Enq," 496 "Failed Deq,Ops(Millions),Throughput(Gbps)," 497 "Cycles/Buf\n\n"); 498 only_once = 1; 499 500 printf("%10u;%10u;%u;%"PRIu64";%"PRIu64";%"PRIu64";%"PRIu64";" 501 "%.f3;%.f3;%.f3\n", 502 ctx->lcore_id, 503 ctx->options->test_buffer_size, 504 test_burst_size, 505 ops_enqd_total, 506 ops_deqd_total, 507 ops_enqd_failed, 508 ops_deqd_failed, 509 ops_per_second/1000000, 510 throughput_gbps, 511 cycles_per_packet); 512 } 513 514 /* Get next size from range or list */ 515 if (ctx->options->inc_burst_size != 0) 516 test_burst_size += ctx->options->inc_burst_size; 517 else { 518 if (++burst_size_idx == ctx->options->burst_size_count) 519 break; 520 test_burst_size = ctx->options->burst_size_list[burst_size_idx]; 521 } 522 523 } 524 525 return 0; 526 } 527 528 529 void 530 cperf_throughput_test_destructor(void *arg) 531 { 532 struct cperf_throughput_ctx *ctx = arg; 533 534 if (ctx == NULL) 535 return; 536 537 rte_cryptodev_stop(ctx->dev_id); 538 539 cperf_throughput_test_free(ctx, ctx->options->pool_sz); 540 } 541