1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) Intel Corporation. 3 * All rights reserved. 4 * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. 5 * All rights reserved. 6 */ 7 8 #include "vbdev_crypto.h" 9 10 #include "spdk/env.h" 11 #include "spdk/likely.h" 12 #include "spdk/endian.h" 13 #include "spdk/thread.h" 14 #include "spdk/bdev_module.h" 15 #include "spdk/log.h" 16 17 #include <rte_config.h> 18 #include <rte_bus_vdev.h> 19 #include <rte_crypto.h> 20 #include <rte_cryptodev.h> 21 #include <rte_mbuf_dyn.h> 22 23 /* Used to store IO context in mbuf */ 24 static const struct rte_mbuf_dynfield rte_mbuf_dynfield_io_context = { 25 .name = "context_bdev_io", 26 .size = sizeof(uint64_t), 27 .align = __alignof__(uint64_t), 28 .flags = 0, 29 }; 30 static int g_mbuf_offset; 31 32 /* To add support for new device types, follow the examples of the following... 33 * Note that the string names are defined by the DPDK PMD in question so be 34 * sure to use the exact names. 35 */ 36 #define MAX_NUM_DRV_TYPES 3 37 38 /* The VF spread is the number of queue pairs between virtual functions, we use this to 39 * load balance the QAT device. 40 */ 41 #define QAT_VF_SPREAD 32 42 static uint8_t g_qat_total_qp = 0; 43 static uint8_t g_next_qat_index; 44 45 const char *g_driver_names[MAX_NUM_DRV_TYPES] = { AESNI_MB, QAT, MLX5 }; 46 47 /* Global list of available crypto devices. */ 48 struct vbdev_dev { 49 struct rte_cryptodev_info cdev_info; /* includes device friendly name */ 50 uint8_t cdev_id; /* identifier for the device */ 51 TAILQ_ENTRY(vbdev_dev) link; 52 }; 53 static TAILQ_HEAD(, vbdev_dev) g_vbdev_devs = TAILQ_HEAD_INITIALIZER(g_vbdev_devs); 54 55 /* Global list and lock for unique device/queue pair combos. We keep 1 list per supported PMD 56 * so that we can optimize per PMD where it make sense. For example, with QAT there an optimal 57 * pattern for assigning queue pairs where with AESNI there is not. 58 */ 59 struct device_qp { 60 struct vbdev_dev *device; /* ptr to crypto device */ 61 uint8_t qp; /* queue pair for this node */ 62 bool in_use; /* whether this node is in use or not */ 63 uint8_t index; /* used by QAT to load balance placement of qpairs */ 64 TAILQ_ENTRY(device_qp) link; 65 }; 66 static TAILQ_HEAD(, device_qp) g_device_qp_qat = TAILQ_HEAD_INITIALIZER(g_device_qp_qat); 67 static TAILQ_HEAD(, device_qp) g_device_qp_aesni_mb = TAILQ_HEAD_INITIALIZER(g_device_qp_aesni_mb); 68 static TAILQ_HEAD(, device_qp) g_device_qp_mlx5 = TAILQ_HEAD_INITIALIZER(g_device_qp_mlx5); 69 static pthread_mutex_t g_device_qp_lock = PTHREAD_MUTEX_INITIALIZER; 70 71 72 /* In order to limit the number of resources we need to do one crypto 73 * operation per LBA (we use LBA as IV), we tell the bdev layer that 74 * our max IO size is something reasonable. Units here are in bytes. 75 */ 76 #define CRYPTO_MAX_IO (64 * 1024) 77 78 /* This controls how many ops will be dequeued from the crypto driver in one run 79 * of the poller. It is mainly a performance knob as it effectively determines how 80 * much work the poller has to do. However even that can vary between crypto drivers 81 * as the AESNI_MB driver for example does all the crypto work on dequeue whereas the 82 * QAT driver just dequeues what has been completed already. 83 */ 84 #define MAX_DEQUEUE_BURST_SIZE 64 85 86 /* When enqueueing, we need to supply the crypto driver with an array of pointers to 87 * operation structs. As each of these can be max 512B, we can adjust the CRYPTO_MAX_IO 88 * value in conjunction with the other defines to make sure we're not using crazy amounts 89 * of memory. All of these numbers can and probably should be adjusted based on the 90 * workload. By default we'll use the worst case (smallest) block size for the 91 * minimum number of array entries. As an example, a CRYPTO_MAX_IO size of 64K with 512B 92 * blocks would give us an enqueue array size of 128. 93 */ 94 #define MAX_ENQUEUE_ARRAY_SIZE (CRYPTO_MAX_IO / 512) 95 96 /* The number of MBUFS we need must be a power of two and to support other small IOs 97 * in addition to the limits mentioned above, we go to the next power of two. It is 98 * big number because it is one mempool for source and destination mbufs. It may 99 * need to be bigger to support multiple crypto drivers at once. 100 */ 101 #define NUM_MBUFS 32768 102 #define POOL_CACHE_SIZE 256 103 #define MAX_CRYPTO_VOLUMES 128 104 #define NUM_SESSIONS (2 * MAX_CRYPTO_VOLUMES) 105 #define SESS_MEMPOOL_CACHE_SIZE 0 106 uint8_t g_number_of_claimed_volumes = 0; 107 108 /* This is the max number of IOs we can supply to any crypto device QP at one time. 109 * It can vary between drivers. 110 */ 111 #define CRYPTO_QP_DESCRIPTORS 2048 112 113 /* At this moment DPDK descriptors allocation for mlx5 has some issues. We use 512 114 * as an compromise value between performance and the time spent for initialization. */ 115 #define CRYPTO_QP_DESCRIPTORS_MLX5 512 116 117 #define AESNI_MB_NUM_QP 64 118 119 /* Common for suported devices. */ 120 #define DEFAULT_NUM_XFORMS 2 121 #define IV_OFFSET (sizeof(struct rte_crypto_op) + \ 122 sizeof(struct rte_crypto_sym_op) + \ 123 (DEFAULT_NUM_XFORMS * \ 124 sizeof(struct rte_crypto_sym_xform))) 125 #define IV_LENGTH 16 126 #define QUEUED_OP_OFFSET (IV_OFFSET + IV_LENGTH) 127 128 static void _complete_internal_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg); 129 static void _complete_internal_read(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg); 130 static void _complete_internal_write(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg); 131 static void vbdev_crypto_examine(struct spdk_bdev *bdev); 132 static int vbdev_crypto_claim(const char *bdev_name); 133 static void vbdev_crypto_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io); 134 135 struct bdev_names { 136 struct vbdev_crypto_opts *opts; 137 TAILQ_ENTRY(bdev_names) link; 138 }; 139 140 /* List of crypto_bdev names and their base bdevs via configuration file. */ 141 static TAILQ_HEAD(, bdev_names) g_bdev_names = TAILQ_HEAD_INITIALIZER(g_bdev_names); 142 143 struct vbdev_crypto { 144 struct spdk_bdev *base_bdev; /* the thing we're attaching to */ 145 struct spdk_bdev_desc *base_desc; /* its descriptor we get from open */ 146 struct spdk_bdev crypto_bdev; /* the crypto virtual bdev */ 147 struct vbdev_crypto_opts *opts; /* crypto options such as key, cipher */ 148 uint32_t qp_desc_nr; /* number of qp descriptors */ 149 struct rte_cryptodev_sym_session *session_encrypt; /* encryption session for this bdev */ 150 struct rte_cryptodev_sym_session *session_decrypt; /* decryption session for this bdev */ 151 struct rte_crypto_sym_xform cipher_xform; /* crypto control struct for this bdev */ 152 TAILQ_ENTRY(vbdev_crypto) link; 153 struct spdk_thread *thread; /* thread where base device is opened */ 154 }; 155 156 /* List of virtual bdevs and associated info for each. We keep the device friendly name here even 157 * though its also in the device struct because we use it early on. 158 */ 159 static TAILQ_HEAD(, vbdev_crypto) g_vbdev_crypto = TAILQ_HEAD_INITIALIZER(g_vbdev_crypto); 160 161 /* Shared mempools between all devices on this system */ 162 static struct rte_mempool *g_session_mp = NULL; 163 static struct rte_mempool *g_session_mp_priv = NULL; 164 static struct rte_mempool *g_mbuf_mp = NULL; /* mbuf mempool */ 165 static struct rte_mempool *g_crypto_op_mp = NULL; /* crypto operations, must be rte* mempool */ 166 167 static struct rte_mbuf_ext_shared_info g_shinfo = {}; /* used by DPDK mbuf macro */ 168 169 /* For queueing up crypto operations that we can't submit for some reason */ 170 struct vbdev_crypto_op { 171 uint8_t cdev_id; 172 uint8_t qp; 173 struct rte_crypto_op *crypto_op; 174 struct spdk_bdev_io *bdev_io; 175 TAILQ_ENTRY(vbdev_crypto_op) link; 176 }; 177 #define QUEUED_OP_LENGTH (sizeof(struct vbdev_crypto_op)) 178 179 /* The crypto vbdev channel struct. It is allocated and freed on my behalf by the io channel code. 180 * We store things in here that are needed on per thread basis like the base_channel for this thread, 181 * and the poller for this thread. 182 */ 183 struct crypto_io_channel { 184 struct spdk_io_channel *base_ch; /* IO channel of base device */ 185 struct spdk_poller *poller; /* completion poller */ 186 struct device_qp *device_qp; /* unique device/qp combination for this channel */ 187 TAILQ_HEAD(, spdk_bdev_io) pending_cry_ios; /* outstanding operations to the crypto device */ 188 struct spdk_io_channel_iter *iter; /* used with for_each_channel in reset */ 189 TAILQ_HEAD(, vbdev_crypto_op) queued_cry_ops; /* queued for re-submission to CryptoDev */ 190 }; 191 192 /* This is the crypto per IO context that the bdev layer allocates for us opaquely and attaches to 193 * each IO for us. 194 */ 195 struct crypto_bdev_io { 196 int cryop_cnt_remaining; /* counter used when completing crypto ops */ 197 struct crypto_io_channel *crypto_ch; /* need to store for crypto completion handling */ 198 struct vbdev_crypto *crypto_bdev; /* the crypto node struct associated with this IO */ 199 struct spdk_bdev_io *orig_io; /* the original IO */ 200 struct spdk_bdev_io *read_io; /* the read IO we issued */ 201 int8_t bdev_io_status; /* the status we'll report back on the bdev IO */ 202 bool on_pending_list; 203 /* Used for the single contiguous buffer that serves as the crypto destination target for writes */ 204 uint64_t aux_num_blocks; /* num of blocks for the contiguous buffer */ 205 uint64_t aux_offset_blocks; /* block offset on media */ 206 void *aux_buf_raw; /* raw buffer that the bdev layer gave us for write buffer */ 207 struct iovec aux_buf_iov; /* iov representing aligned contig write buffer */ 208 209 /* for bdev_io_wait */ 210 struct spdk_bdev_io_wait_entry bdev_io_wait; 211 struct spdk_io_channel *ch; 212 }; 213 214 /* Called by vbdev_crypto_init_crypto_drivers() to init each discovered crypto device */ 215 static int 216 create_vbdev_dev(uint8_t index, uint16_t num_lcores) 217 { 218 struct vbdev_dev *device; 219 uint8_t j, cdev_id, cdrv_id; 220 struct device_qp *dev_qp; 221 struct device_qp *tmp_qp; 222 uint32_t qp_desc_nr; 223 int rc; 224 TAILQ_HEAD(device_qps, device_qp) *dev_qp_head; 225 226 device = calloc(1, sizeof(struct vbdev_dev)); 227 if (!device) { 228 return -ENOMEM; 229 } 230 231 /* Get details about this device. */ 232 rte_cryptodev_info_get(index, &device->cdev_info); 233 cdrv_id = device->cdev_info.driver_id; 234 cdev_id = device->cdev_id = index; 235 236 /* QAT_ASYM devices are not supported at this time. */ 237 if (strcmp(device->cdev_info.driver_name, QAT_ASYM) == 0) { 238 free(device); 239 return 0; 240 } 241 242 /* Before going any further, make sure we have enough resources for this 243 * device type to function. We need a unique queue pair per core accross each 244 * device type to remain lockless.... 245 */ 246 if ((rte_cryptodev_device_count_by_driver(cdrv_id) * 247 device->cdev_info.max_nb_queue_pairs) < num_lcores) { 248 SPDK_ERRLOG("Insufficient unique queue pairs available for %s\n", 249 device->cdev_info.driver_name); 250 SPDK_ERRLOG("Either add more crypto devices or decrease core count\n"); 251 rc = -EINVAL; 252 goto err; 253 } 254 255 /* Setup queue pairs. */ 256 struct rte_cryptodev_config conf = { 257 .nb_queue_pairs = device->cdev_info.max_nb_queue_pairs, 258 .socket_id = SPDK_ENV_SOCKET_ID_ANY 259 }; 260 261 rc = rte_cryptodev_configure(cdev_id, &conf); 262 if (rc < 0) { 263 SPDK_ERRLOG("Failed to configure cryptodev %u: error %d\n", 264 cdev_id, rc); 265 rc = -EINVAL; 266 goto err; 267 } 268 269 /* Select the right device/qp list based on driver name 270 * or error if it does not exist. 271 */ 272 if (strcmp(device->cdev_info.driver_name, QAT) == 0) { 273 dev_qp_head = (struct device_qps *)&g_device_qp_qat; 274 qp_desc_nr = CRYPTO_QP_DESCRIPTORS; 275 } else if (strcmp(device->cdev_info.driver_name, AESNI_MB) == 0) { 276 dev_qp_head = (struct device_qps *)&g_device_qp_aesni_mb; 277 qp_desc_nr = CRYPTO_QP_DESCRIPTORS; 278 } else if (strcmp(device->cdev_info.driver_name, MLX5) == 0) { 279 dev_qp_head = (struct device_qps *)&g_device_qp_mlx5; 280 qp_desc_nr = CRYPTO_QP_DESCRIPTORS_MLX5; 281 } else { 282 SPDK_ERRLOG("Failed to start device %u. Invalid driver name \"%s\"\n", 283 cdev_id, device->cdev_info.driver_name); 284 rc = -EINVAL; 285 goto err_qp_setup; 286 } 287 288 struct rte_cryptodev_qp_conf qp_conf = { 289 .nb_descriptors = qp_desc_nr, 290 .mp_session = g_session_mp, 291 .mp_session_private = g_session_mp_priv, 292 }; 293 294 /* Pre-setup all potential qpairs now and assign them in the channel 295 * callback. If we were to create them there, we'd have to stop the 296 * entire device affecting all other threads that might be using it 297 * even on other queue pairs. 298 */ 299 for (j = 0; j < device->cdev_info.max_nb_queue_pairs; j++) { 300 rc = rte_cryptodev_queue_pair_setup(cdev_id, j, &qp_conf, SOCKET_ID_ANY); 301 if (rc < 0) { 302 SPDK_ERRLOG("Failed to setup queue pair %u on " 303 "cryptodev %u: error %d\n", j, cdev_id, rc); 304 rc = -EINVAL; 305 goto err_qp_setup; 306 } 307 } 308 309 rc = rte_cryptodev_start(cdev_id); 310 if (rc < 0) { 311 SPDK_ERRLOG("Failed to start device %u: error %d\n", 312 cdev_id, rc); 313 rc = -EINVAL; 314 goto err_dev_start; 315 } 316 317 /* Build up lists of device/qp combinations per PMD */ 318 for (j = 0; j < device->cdev_info.max_nb_queue_pairs; j++) { 319 dev_qp = calloc(1, sizeof(struct device_qp)); 320 if (!dev_qp) { 321 rc = -ENOMEM; 322 goto err_qp_alloc; 323 } 324 dev_qp->device = device; 325 dev_qp->qp = j; 326 dev_qp->in_use = false; 327 if (strcmp(device->cdev_info.driver_name, QAT) == 0) { 328 g_qat_total_qp++; 329 } 330 TAILQ_INSERT_TAIL(dev_qp_head, dev_qp, link); 331 } 332 333 /* Add to our list of available crypto devices. */ 334 TAILQ_INSERT_TAIL(&g_vbdev_devs, device, link); 335 336 return 0; 337 err_qp_alloc: 338 TAILQ_FOREACH_SAFE(dev_qp, dev_qp_head, link, tmp_qp) { 339 if (dev_qp->device->cdev_id != device->cdev_id) { 340 continue; 341 } 342 TAILQ_REMOVE(dev_qp_head, dev_qp, link); 343 if (dev_qp_head == (struct device_qps *)&g_device_qp_qat) { 344 g_qat_total_qp--; 345 } 346 free(dev_qp); 347 } 348 rte_cryptodev_stop(cdev_id); 349 err_dev_start: 350 err_qp_setup: 351 rte_cryptodev_close(cdev_id); 352 err: 353 free(device); 354 355 return rc; 356 } 357 358 static void 359 release_vbdev_dev(struct vbdev_dev *device) 360 { 361 struct device_qp *dev_qp; 362 struct device_qp *tmp_qp; 363 TAILQ_HEAD(device_qps, device_qp) *dev_qp_head = NULL; 364 365 assert(device); 366 367 /* Select the right device/qp list based on driver name. */ 368 if (strcmp(device->cdev_info.driver_name, QAT) == 0) { 369 dev_qp_head = (struct device_qps *)&g_device_qp_qat; 370 } else if (strcmp(device->cdev_info.driver_name, AESNI_MB) == 0) { 371 dev_qp_head = (struct device_qps *)&g_device_qp_aesni_mb; 372 } else if (strcmp(device->cdev_info.driver_name, MLX5) == 0) { 373 dev_qp_head = (struct device_qps *)&g_device_qp_mlx5; 374 } 375 if (dev_qp_head) { 376 TAILQ_FOREACH_SAFE(dev_qp, dev_qp_head, link, tmp_qp) { 377 /* Remove only qps of our device even if the driver names matches. */ 378 if (dev_qp->device->cdev_id != device->cdev_id) { 379 continue; 380 } 381 TAILQ_REMOVE(dev_qp_head, dev_qp, link); 382 if (dev_qp_head == (struct device_qps *)&g_device_qp_qat) { 383 g_qat_total_qp--; 384 } 385 free(dev_qp); 386 } 387 } 388 rte_cryptodev_stop(device->cdev_id); 389 rte_cryptodev_close(device->cdev_id); 390 free(device); 391 } 392 393 /* Dummy function used by DPDK to free ext attached buffers to mbufs, we free them ourselves but 394 * this callback has to be here. */ 395 static void 396 shinfo_free_cb(void *arg1, void *arg2) 397 { 398 } 399 400 /* This is called from the module's init function. We setup all crypto devices early on as we are unable 401 * to easily dynamically configure queue pairs after the drivers are up and running. So, here, we 402 * configure the max capabilities of each device and assign threads to queue pairs as channels are 403 * requested. 404 */ 405 static int 406 vbdev_crypto_init_crypto_drivers(void) 407 { 408 uint8_t cdev_count; 409 uint8_t cdev_id; 410 int i, rc; 411 struct vbdev_dev *device; 412 struct vbdev_dev *tmp_dev; 413 struct device_qp *dev_qp; 414 unsigned int max_sess_size = 0, sess_size; 415 uint16_t num_lcores = rte_lcore_count(); 416 char aesni_args[32]; 417 418 /* Only the first call, via RPC or module init should init the crypto drivers. */ 419 if (g_session_mp != NULL) { 420 return 0; 421 } 422 423 /* We always init AESNI_MB */ 424 snprintf(aesni_args, sizeof(aesni_args), "max_nb_queue_pairs=%d", AESNI_MB_NUM_QP); 425 rc = rte_vdev_init(AESNI_MB, aesni_args); 426 if (rc) { 427 SPDK_NOTICELOG("Failed to create virtual PMD %s: error %d. " 428 "Possibly %s is not supported by DPDK library. " 429 "Keep going...\n", AESNI_MB, rc, AESNI_MB); 430 } 431 432 /* If we have no crypto devices, there's no reason to continue. */ 433 cdev_count = rte_cryptodev_count(); 434 SPDK_NOTICELOG("Found crypto devices: %d\n", (int)cdev_count); 435 if (cdev_count == 0) { 436 return 0; 437 } 438 439 g_mbuf_offset = rte_mbuf_dynfield_register(&rte_mbuf_dynfield_io_context); 440 if (g_mbuf_offset < 0) { 441 SPDK_ERRLOG("error registering dynamic field with DPDK\n"); 442 return -EINVAL; 443 } 444 445 /* 446 * Create global mempools, shared by all devices regardless of type. 447 */ 448 449 /* First determine max session size, most pools are shared by all the devices, 450 * so we need to find the global max sessions size. 451 */ 452 for (cdev_id = 0; cdev_id < cdev_count; cdev_id++) { 453 sess_size = rte_cryptodev_sym_get_private_session_size(cdev_id); 454 if (sess_size > max_sess_size) { 455 max_sess_size = sess_size; 456 } 457 } 458 459 g_session_mp_priv = rte_mempool_create("session_mp_priv", NUM_SESSIONS, max_sess_size, 460 SESS_MEMPOOL_CACHE_SIZE, 0, NULL, NULL, NULL, 461 NULL, SOCKET_ID_ANY, 0); 462 if (g_session_mp_priv == NULL) { 463 SPDK_ERRLOG("Cannot create private session pool max size 0x%x\n", max_sess_size); 464 return -ENOMEM; 465 } 466 467 g_session_mp = rte_cryptodev_sym_session_pool_create( 468 "session_mp", 469 NUM_SESSIONS, 0, SESS_MEMPOOL_CACHE_SIZE, 0, 470 SOCKET_ID_ANY); 471 if (g_session_mp == NULL) { 472 SPDK_ERRLOG("Cannot create session pool max size 0x%x\n", max_sess_size); 473 rc = -ENOMEM; 474 goto error_create_session_mp; 475 } 476 477 g_mbuf_mp = rte_pktmbuf_pool_create("mbuf_mp", NUM_MBUFS, POOL_CACHE_SIZE, 478 0, 0, SPDK_ENV_SOCKET_ID_ANY); 479 if (g_mbuf_mp == NULL) { 480 SPDK_ERRLOG("Cannot create mbuf pool\n"); 481 rc = -ENOMEM; 482 goto error_create_mbuf; 483 } 484 485 /* We use per op private data as suggested by DPDK and to store the IV and 486 * our own struct for queueing ops. 487 */ 488 g_crypto_op_mp = rte_crypto_op_pool_create("op_mp", 489 RTE_CRYPTO_OP_TYPE_SYMMETRIC, 490 NUM_MBUFS, 491 POOL_CACHE_SIZE, 492 (DEFAULT_NUM_XFORMS * 493 sizeof(struct rte_crypto_sym_xform)) + 494 IV_LENGTH + QUEUED_OP_LENGTH, 495 rte_socket_id()); 496 497 if (g_crypto_op_mp == NULL) { 498 SPDK_ERRLOG("Cannot create op pool\n"); 499 rc = -ENOMEM; 500 goto error_create_op; 501 } 502 503 /* Init all devices */ 504 for (i = 0; i < cdev_count; i++) { 505 rc = create_vbdev_dev(i, num_lcores); 506 if (rc) { 507 goto err; 508 } 509 } 510 511 /* Assign index values to the QAT device qp nodes so that we can 512 * assign them for optimal performance. 513 */ 514 i = 0; 515 TAILQ_FOREACH(dev_qp, &g_device_qp_qat, link) { 516 dev_qp->index = i++; 517 } 518 519 g_shinfo.free_cb = shinfo_free_cb; 520 return 0; 521 522 /* Error cleanup paths. */ 523 err: 524 TAILQ_FOREACH_SAFE(device, &g_vbdev_devs, link, tmp_dev) { 525 TAILQ_REMOVE(&g_vbdev_devs, device, link); 526 release_vbdev_dev(device); 527 } 528 rte_mempool_free(g_crypto_op_mp); 529 g_crypto_op_mp = NULL; 530 error_create_op: 531 rte_mempool_free(g_mbuf_mp); 532 g_mbuf_mp = NULL; 533 error_create_mbuf: 534 rte_mempool_free(g_session_mp); 535 g_session_mp = NULL; 536 error_create_session_mp: 537 if (g_session_mp_priv != NULL) { 538 rte_mempool_free(g_session_mp_priv); 539 g_session_mp_priv = NULL; 540 } 541 return rc; 542 } 543 544 /* Following an encrypt or decrypt we need to then either write the encrypted data or finish 545 * the read on decrypted data. Do that here. 546 */ 547 static void 548 _crypto_operation_complete(struct spdk_bdev_io *bdev_io) 549 { 550 struct vbdev_crypto *crypto_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_crypto, 551 crypto_bdev); 552 struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx; 553 struct crypto_io_channel *crypto_ch = io_ctx->crypto_ch; 554 struct spdk_bdev_io *free_me = io_ctx->read_io; 555 int rc = 0; 556 557 /* Can also be called from the crypto_dev_poller() to fail the stuck re-enqueue ops IO. */ 558 if (io_ctx->on_pending_list) { 559 TAILQ_REMOVE(&crypto_ch->pending_cry_ios, bdev_io, module_link); 560 io_ctx->on_pending_list = false; 561 } 562 563 if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) { 564 565 /* Complete the original IO and then free the one that we created 566 * as a result of issuing an IO via submit_request. 567 */ 568 if (io_ctx->bdev_io_status != SPDK_BDEV_IO_STATUS_FAILED) { 569 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 570 } else { 571 SPDK_ERRLOG("Issue with decryption on bdev_io %p\n", bdev_io); 572 rc = -EINVAL; 573 } 574 spdk_bdev_free_io(free_me); 575 576 } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) { 577 578 if (io_ctx->bdev_io_status != SPDK_BDEV_IO_STATUS_FAILED) { 579 /* Write the encrypted data. */ 580 rc = spdk_bdev_writev_blocks(crypto_bdev->base_desc, crypto_ch->base_ch, 581 &io_ctx->aux_buf_iov, 1, io_ctx->aux_offset_blocks, 582 io_ctx->aux_num_blocks, _complete_internal_write, 583 bdev_io); 584 } else { 585 SPDK_ERRLOG("Issue with encryption on bdev_io %p\n", bdev_io); 586 rc = -EINVAL; 587 } 588 589 } else { 590 SPDK_ERRLOG("Unknown bdev type %u on crypto operation completion\n", 591 bdev_io->type); 592 rc = -EINVAL; 593 } 594 595 if (rc) { 596 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 597 } 598 } 599 600 static void 601 cancel_queued_crypto_ops(struct crypto_io_channel *crypto_ch, struct spdk_bdev_io *bdev_io) 602 { 603 struct rte_mbuf *mbufs_to_free[2 * MAX_DEQUEUE_BURST_SIZE]; 604 struct rte_crypto_op *dequeued_ops[MAX_DEQUEUE_BURST_SIZE]; 605 struct vbdev_crypto_op *op_to_cancel, *tmp_op; 606 struct rte_crypto_op *crypto_op; 607 int num_mbufs, num_dequeued_ops; 608 609 /* Remove all ops from the failed IO. Since we don't know the 610 * order we have to check them all. */ 611 num_mbufs = 0; 612 num_dequeued_ops = 0; 613 TAILQ_FOREACH_SAFE(op_to_cancel, &crypto_ch->queued_cry_ops, link, tmp_op) { 614 /* Checking if this is our op. One IO contains multiple ops. */ 615 if (bdev_io == op_to_cancel->bdev_io) { 616 crypto_op = op_to_cancel->crypto_op; 617 TAILQ_REMOVE(&crypto_ch->queued_cry_ops, op_to_cancel, link); 618 619 /* Populating lists for freeing mbufs and ops. */ 620 mbufs_to_free[num_mbufs++] = (void *)crypto_op->sym->m_src; 621 if (crypto_op->sym->m_dst) { 622 mbufs_to_free[num_mbufs++] = (void *)crypto_op->sym->m_dst; 623 } 624 dequeued_ops[num_dequeued_ops++] = crypto_op; 625 } 626 } 627 628 /* Now bulk free both mbufs and crypto operations. */ 629 if (num_dequeued_ops > 0) { 630 rte_mempool_put_bulk(g_crypto_op_mp, (void **)dequeued_ops, 631 num_dequeued_ops); 632 assert(num_mbufs > 0); 633 /* This also releases chained mbufs if any. */ 634 rte_pktmbuf_free_bulk(mbufs_to_free, num_mbufs); 635 } 636 } 637 638 static int _crypto_operation(struct spdk_bdev_io *bdev_io, 639 enum rte_crypto_cipher_operation crypto_op, 640 void *aux_buf); 641 642 /* This is the poller for the crypto device. It uses a single API to dequeue whatever is ready at 643 * the device. Then we need to decide if what we've got so far (including previous poller 644 * runs) totals up to one or more complete bdev_ios and if so continue with the bdev_io 645 * accordingly. This means either completing a read or issuing a new write. 646 */ 647 static int 648 crypto_dev_poller(void *args) 649 { 650 struct crypto_io_channel *crypto_ch = args; 651 uint8_t cdev_id = crypto_ch->device_qp->device->cdev_id; 652 int i, num_dequeued_ops, num_enqueued_ops; 653 struct spdk_bdev_io *bdev_io = NULL; 654 struct crypto_bdev_io *io_ctx = NULL; 655 struct rte_crypto_op *dequeued_ops[MAX_DEQUEUE_BURST_SIZE]; 656 struct rte_mbuf *mbufs_to_free[2 * MAX_DEQUEUE_BURST_SIZE]; 657 int num_mbufs = 0; 658 struct vbdev_crypto_op *op_to_resubmit; 659 660 /* Each run of the poller will get just what the device has available 661 * at the moment we call it, we don't check again after draining the 662 * first batch. 663 */ 664 num_dequeued_ops = rte_cryptodev_dequeue_burst(cdev_id, crypto_ch->device_qp->qp, 665 dequeued_ops, MAX_DEQUEUE_BURST_SIZE); 666 667 /* Check if operation was processed successfully */ 668 for (i = 0; i < num_dequeued_ops; i++) { 669 670 /* We don't know the order or association of the crypto ops wrt any 671 * particular bdev_io so need to look at each and determine if it's 672 * the last one for it's bdev_io or not. 673 */ 674 bdev_io = (struct spdk_bdev_io *)*RTE_MBUF_DYNFIELD(dequeued_ops[i]->sym->m_src, g_mbuf_offset, 675 uint64_t *); 676 assert(bdev_io != NULL); 677 io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx; 678 679 if (dequeued_ops[i]->status != RTE_CRYPTO_OP_STATUS_SUCCESS) { 680 SPDK_ERRLOG("error with op %d status %u\n", i, 681 dequeued_ops[i]->status); 682 /* Update the bdev status to error, we'll still process the 683 * rest of the crypto ops for this bdev_io though so they 684 * aren't left hanging. 685 */ 686 io_ctx->bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; 687 } 688 689 assert(io_ctx->cryop_cnt_remaining > 0); 690 691 /* Return the associated src and dst mbufs by collecting them into 692 * an array that we can use the bulk API to free after the loop. 693 */ 694 *RTE_MBUF_DYNFIELD(dequeued_ops[i]->sym->m_src, g_mbuf_offset, uint64_t *) = 0; 695 mbufs_to_free[num_mbufs++] = (void *)dequeued_ops[i]->sym->m_src; 696 if (dequeued_ops[i]->sym->m_dst) { 697 mbufs_to_free[num_mbufs++] = (void *)dequeued_ops[i]->sym->m_dst; 698 } 699 700 /* done encrypting, complete the bdev_io */ 701 if (--io_ctx->cryop_cnt_remaining == 0) { 702 703 /* If we're completing this with an outstanding reset we need 704 * to fail it. 705 */ 706 if (crypto_ch->iter) { 707 io_ctx->bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; 708 } 709 710 /* Complete the IO */ 711 _crypto_operation_complete(bdev_io); 712 } 713 } 714 715 /* Now bulk free both mbufs and crypto operations. */ 716 if (num_dequeued_ops > 0) { 717 rte_mempool_put_bulk(g_crypto_op_mp, 718 (void **)dequeued_ops, 719 num_dequeued_ops); 720 assert(num_mbufs > 0); 721 /* This also releases chained mbufs if any. */ 722 rte_pktmbuf_free_bulk(mbufs_to_free, num_mbufs); 723 } 724 725 /* Check if there are any pending crypto ops to process */ 726 while (!TAILQ_EMPTY(&crypto_ch->queued_cry_ops)) { 727 op_to_resubmit = TAILQ_FIRST(&crypto_ch->queued_cry_ops); 728 bdev_io = op_to_resubmit->bdev_io; 729 io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx; 730 num_enqueued_ops = rte_cryptodev_enqueue_burst(op_to_resubmit->cdev_id, 731 op_to_resubmit->qp, 732 &op_to_resubmit->crypto_op, 733 1); 734 if (num_enqueued_ops == 1) { 735 /* Make sure we don't put this on twice as one bdev_io is made up 736 * of many crypto ops. 737 */ 738 if (io_ctx->on_pending_list == false) { 739 TAILQ_INSERT_TAIL(&crypto_ch->pending_cry_ios, bdev_io, module_link); 740 io_ctx->on_pending_list = true; 741 } 742 TAILQ_REMOVE(&crypto_ch->queued_cry_ops, op_to_resubmit, link); 743 } else { 744 if (op_to_resubmit->crypto_op->status == RTE_CRYPTO_OP_STATUS_NOT_PROCESSED) { 745 /* If we couldn't get one, just break and try again later. */ 746 break; 747 } else { 748 /* Something is really wrong with the op. Most probably the 749 * mbuf is broken or the HW is not able to process the request. 750 * Fail the IO and remove its ops from the queued ops list. */ 751 io_ctx->bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; 752 753 cancel_queued_crypto_ops(crypto_ch, bdev_io); 754 755 /* Fail the IO if there is nothing left on device. */ 756 if (--io_ctx->cryop_cnt_remaining == 0) { 757 _crypto_operation_complete(bdev_io); 758 } 759 } 760 761 } 762 } 763 764 /* If the channel iter is not NULL, we need to continue to poll 765 * until the pending list is empty, then we can move on to the 766 * next channel. 767 */ 768 if (crypto_ch->iter && TAILQ_EMPTY(&crypto_ch->pending_cry_ios)) { 769 SPDK_NOTICELOG("Channel %p has been quiesced.\n", crypto_ch); 770 spdk_for_each_channel_continue(crypto_ch->iter, 0); 771 crypto_ch->iter = NULL; 772 } 773 774 return num_dequeued_ops; 775 } 776 777 /* Allocate the new mbuf of @remainder size with data pointed by @addr and attach 778 * it to the @orig_mbuf. */ 779 static int 780 mbuf_chain_remainder(struct spdk_bdev_io *bdev_io, struct rte_mbuf *orig_mbuf, 781 uint8_t *addr, uint32_t remainder) 782 { 783 uint64_t phys_addr, phys_len; 784 struct rte_mbuf *chain_mbuf; 785 int rc; 786 787 phys_len = remainder; 788 phys_addr = spdk_vtophys((void *)addr, &phys_len); 789 if (spdk_unlikely(phys_addr == SPDK_VTOPHYS_ERROR || phys_len != remainder)) { 790 return -EFAULT; 791 } 792 rc = rte_pktmbuf_alloc_bulk(g_mbuf_mp, (struct rte_mbuf **)&chain_mbuf, 1); 793 if (spdk_unlikely(rc)) { 794 return -ENOMEM; 795 } 796 /* Store context in every mbuf as we don't know anything about completion order */ 797 *RTE_MBUF_DYNFIELD(chain_mbuf, g_mbuf_offset, uint64_t *) = (uint64_t)bdev_io; 798 rte_pktmbuf_attach_extbuf(chain_mbuf, addr, phys_addr, phys_len, &g_shinfo); 799 rte_pktmbuf_append(chain_mbuf, phys_len); 800 801 /* Chained buffer is released by rte_pktbuf_free_bulk() automagicaly. */ 802 rte_pktmbuf_chain(orig_mbuf, chain_mbuf); 803 return 0; 804 } 805 806 /* Attach data buffer pointed by @addr to @mbuf. Return utilized len of the 807 * contiguous space that was physically available. */ 808 static uint64_t 809 mbuf_attach_buf(struct spdk_bdev_io *bdev_io, struct rte_mbuf *mbuf, 810 uint8_t *addr, uint32_t len) 811 { 812 uint64_t phys_addr, phys_len; 813 814 /* Store context in every mbuf as we don't know anything about completion order */ 815 *RTE_MBUF_DYNFIELD(mbuf, g_mbuf_offset, uint64_t *) = (uint64_t)bdev_io; 816 817 phys_len = len; 818 phys_addr = spdk_vtophys((void *)addr, &phys_len); 819 if (spdk_unlikely(phys_addr == SPDK_VTOPHYS_ERROR || phys_len == 0)) { 820 return 0; 821 } 822 assert(phys_len <= len); 823 824 /* Set the mbuf elements address and length. */ 825 rte_pktmbuf_attach_extbuf(mbuf, addr, phys_addr, phys_len, &g_shinfo); 826 rte_pktmbuf_append(mbuf, phys_len); 827 828 return phys_len; 829 } 830 831 /* We're either encrypting on the way down or decrypting on the way back. */ 832 static int 833 _crypto_operation(struct spdk_bdev_io *bdev_io, enum rte_crypto_cipher_operation crypto_op, 834 void *aux_buf) 835 { 836 uint16_t num_enqueued_ops = 0; 837 uint32_t cryop_cnt = bdev_io->u.bdev.num_blocks; 838 struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx; 839 struct crypto_io_channel *crypto_ch = io_ctx->crypto_ch; 840 uint8_t cdev_id = crypto_ch->device_qp->device->cdev_id; 841 uint32_t crypto_len = io_ctx->crypto_bdev->crypto_bdev.blocklen; 842 uint64_t total_length = bdev_io->u.bdev.num_blocks * crypto_len; 843 int rc; 844 uint32_t iov_index = 0; 845 uint32_t allocated = 0; 846 uint8_t *current_iov = NULL; 847 uint64_t total_remaining = 0; 848 uint64_t current_iov_remaining = 0; 849 uint32_t crypto_index = 0; 850 uint32_t en_offset = 0; 851 struct rte_crypto_op *crypto_ops[MAX_ENQUEUE_ARRAY_SIZE]; 852 struct rte_mbuf *src_mbufs[MAX_ENQUEUE_ARRAY_SIZE]; 853 struct rte_mbuf *dst_mbufs[MAX_ENQUEUE_ARRAY_SIZE]; 854 int burst; 855 struct vbdev_crypto_op *op_to_queue; 856 uint64_t alignment = spdk_bdev_get_buf_align(&io_ctx->crypto_bdev->crypto_bdev); 857 858 assert((bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen) <= CRYPTO_MAX_IO); 859 860 /* Get the number of source mbufs that we need. These will always be 1:1 because we 861 * don't support chaining. The reason we don't is because of our decision to use 862 * LBA as IV, there can be no case where we'd need >1 mbuf per crypto op or the 863 * op would be > 1 LBA. 864 */ 865 rc = rte_pktmbuf_alloc_bulk(g_mbuf_mp, src_mbufs, cryop_cnt); 866 if (rc) { 867 SPDK_ERRLOG("Failed to get src_mbufs!\n"); 868 return -ENOMEM; 869 } 870 871 /* Get the same amount but these buffers to describe the encrypted data location (dst). */ 872 if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) { 873 rc = rte_pktmbuf_alloc_bulk(g_mbuf_mp, dst_mbufs, cryop_cnt); 874 if (rc) { 875 SPDK_ERRLOG("Failed to get dst_mbufs!\n"); 876 rc = -ENOMEM; 877 goto error_get_dst; 878 } 879 } 880 881 #ifdef __clang_analyzer__ 882 /* silence scan-build false positive */ 883 SPDK_CLANG_ANALYZER_PREINIT_PTR_ARRAY(crypto_ops, MAX_ENQUEUE_ARRAY_SIZE, 0x1000); 884 #endif 885 /* Allocate crypto operations. */ 886 allocated = rte_crypto_op_bulk_alloc(g_crypto_op_mp, 887 RTE_CRYPTO_OP_TYPE_SYMMETRIC, 888 crypto_ops, cryop_cnt); 889 if (allocated < cryop_cnt) { 890 SPDK_ERRLOG("Failed to allocate crypto ops!\n"); 891 rc = -ENOMEM; 892 goto error_get_ops; 893 } 894 895 /* For encryption, we need to prepare a single contiguous buffer as the encryption 896 * destination, we'll then pass that along for the write after encryption is done. 897 * This is done to avoiding encrypting the provided write buffer which may be 898 * undesirable in some use cases. 899 */ 900 if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) { 901 io_ctx->aux_buf_iov.iov_len = total_length; 902 io_ctx->aux_buf_raw = aux_buf; 903 io_ctx->aux_buf_iov.iov_base = (void *)(((uintptr_t)aux_buf + (alignment - 1)) & ~(alignment - 1)); 904 io_ctx->aux_offset_blocks = bdev_io->u.bdev.offset_blocks; 905 io_ctx->aux_num_blocks = bdev_io->u.bdev.num_blocks; 906 } 907 908 /* This value is used in the completion callback to determine when the bdev_io is 909 * complete. 910 */ 911 io_ctx->cryop_cnt_remaining = cryop_cnt; 912 913 /* As we don't support chaining because of a decision to use LBA as IV, construction 914 * of crypto operations is straightforward. We build both the op, the mbuf and the 915 * dst_mbuf in our local arrays by looping through the length of the bdev IO and 916 * picking off LBA sized blocks of memory from the IOVs as we walk through them. Each 917 * LBA sized chunk of memory will correspond 1:1 to a crypto operation and a single 918 * mbuf per crypto operation. 919 */ 920 total_remaining = total_length; 921 current_iov = bdev_io->u.bdev.iovs[iov_index].iov_base; 922 current_iov_remaining = bdev_io->u.bdev.iovs[iov_index].iov_len; 923 do { 924 uint8_t *iv_ptr; 925 uint8_t *buf_addr; 926 uint64_t phys_len; 927 uint32_t remainder; 928 uint64_t op_block_offset; 929 930 phys_len = mbuf_attach_buf(bdev_io, src_mbufs[crypto_index], 931 current_iov, crypto_len); 932 if (spdk_unlikely(phys_len == 0)) { 933 goto error_attach_session; 934 rc = -EFAULT; 935 } 936 937 /* Handle the case of page boundary. */ 938 remainder = crypto_len - phys_len; 939 if (spdk_unlikely(remainder > 0)) { 940 rc = mbuf_chain_remainder(bdev_io, src_mbufs[crypto_index], 941 current_iov + phys_len, remainder); 942 if (spdk_unlikely(rc)) { 943 goto error_attach_session; 944 } 945 } 946 947 /* Set the IV - we use the LBA of the crypto_op */ 948 iv_ptr = rte_crypto_op_ctod_offset(crypto_ops[crypto_index], uint8_t *, 949 IV_OFFSET); 950 memset(iv_ptr, 0, IV_LENGTH); 951 op_block_offset = bdev_io->u.bdev.offset_blocks + crypto_index; 952 rte_memcpy(iv_ptr, &op_block_offset, sizeof(uint64_t)); 953 954 /* Set the data to encrypt/decrypt length */ 955 crypto_ops[crypto_index]->sym->cipher.data.length = crypto_len; 956 crypto_ops[crypto_index]->sym->cipher.data.offset = 0; 957 958 /* link the mbuf to the crypto op. */ 959 crypto_ops[crypto_index]->sym->m_src = src_mbufs[crypto_index]; 960 961 /* For encrypt, point the destination to a buffer we allocate and redirect the bdev_io 962 * that will be used to process the write on completion to the same buffer. Setting 963 * up the en_buffer is a little simpler as we know the destination buffer is single IOV. 964 */ 965 if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) { 966 buf_addr = io_ctx->aux_buf_iov.iov_base + en_offset; 967 phys_len = mbuf_attach_buf(bdev_io, dst_mbufs[crypto_index], 968 buf_addr, crypto_len); 969 if (spdk_unlikely(phys_len == 0)) { 970 rc = -EFAULT; 971 goto error_attach_session; 972 } 973 974 crypto_ops[crypto_index]->sym->m_dst = dst_mbufs[crypto_index]; 975 en_offset += phys_len; 976 977 /* Handle the case of page boundary. */ 978 remainder = crypto_len - phys_len; 979 if (spdk_unlikely(remainder > 0)) { 980 rc = mbuf_chain_remainder(bdev_io, dst_mbufs[crypto_index], 981 buf_addr + phys_len, remainder); 982 if (spdk_unlikely(rc)) { 983 goto error_attach_session; 984 } 985 en_offset += remainder; 986 } 987 988 /* Attach the crypto session to the operation */ 989 rc = rte_crypto_op_attach_sym_session(crypto_ops[crypto_index], 990 io_ctx->crypto_bdev->session_encrypt); 991 if (rc) { 992 rc = -EINVAL; 993 goto error_attach_session; 994 } 995 } else { 996 crypto_ops[crypto_index]->sym->m_dst = NULL; 997 998 /* Attach the crypto session to the operation */ 999 rc = rte_crypto_op_attach_sym_session(crypto_ops[crypto_index], 1000 io_ctx->crypto_bdev->session_decrypt); 1001 if (rc) { 1002 rc = -EINVAL; 1003 goto error_attach_session; 1004 } 1005 } 1006 1007 /* Subtract our running totals for the op in progress and the overall bdev io */ 1008 total_remaining -= crypto_len; 1009 current_iov_remaining -= crypto_len; 1010 1011 /* move our current IOV pointer accordingly. */ 1012 current_iov += crypto_len; 1013 1014 /* move on to the next crypto operation */ 1015 crypto_index++; 1016 1017 /* If we're done with this IOV, move to the next one. */ 1018 if (current_iov_remaining == 0 && total_remaining > 0) { 1019 iov_index++; 1020 current_iov = bdev_io->u.bdev.iovs[iov_index].iov_base; 1021 current_iov_remaining = bdev_io->u.bdev.iovs[iov_index].iov_len; 1022 } 1023 } while (total_remaining > 0); 1024 1025 /* Enqueue everything we've got but limit by the max number of descriptors we 1026 * configured the crypto device for. 1027 */ 1028 burst = spdk_min(cryop_cnt, io_ctx->crypto_bdev->qp_desc_nr); 1029 num_enqueued_ops = rte_cryptodev_enqueue_burst(cdev_id, crypto_ch->device_qp->qp, 1030 &crypto_ops[0], 1031 burst); 1032 1033 /* Add this bdev_io to our outstanding list if any of its crypto ops made it. */ 1034 if (num_enqueued_ops > 0) { 1035 TAILQ_INSERT_TAIL(&crypto_ch->pending_cry_ios, bdev_io, module_link); 1036 io_ctx->on_pending_list = true; 1037 } 1038 /* We were unable to enqueue everything but did get some, so need to decide what 1039 * to do based on the status of the last op. 1040 */ 1041 if (num_enqueued_ops < cryop_cnt) { 1042 switch (crypto_ops[num_enqueued_ops]->status) { 1043 case RTE_CRYPTO_OP_STATUS_NOT_PROCESSED: 1044 /* Queue them up on a linked list to be resubmitted via the poller. */ 1045 for (crypto_index = num_enqueued_ops; crypto_index < cryop_cnt; crypto_index++) { 1046 op_to_queue = (struct vbdev_crypto_op *)rte_crypto_op_ctod_offset(crypto_ops[crypto_index], 1047 uint8_t *, QUEUED_OP_OFFSET); 1048 op_to_queue->cdev_id = cdev_id; 1049 op_to_queue->qp = crypto_ch->device_qp->qp; 1050 op_to_queue->crypto_op = crypto_ops[crypto_index]; 1051 op_to_queue->bdev_io = bdev_io; 1052 TAILQ_INSERT_TAIL(&crypto_ch->queued_cry_ops, 1053 op_to_queue, 1054 link); 1055 } 1056 break; 1057 default: 1058 /* For all other statuses, set the io_ctx bdev_io status so that 1059 * the poller will pick the failure up for the overall bdev status. 1060 */ 1061 io_ctx->bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED; 1062 if (num_enqueued_ops == 0) { 1063 /* If nothing was enqueued, but the last one wasn't because of 1064 * busy, fail it now as the poller won't know anything about it. 1065 */ 1066 rc = -EINVAL; 1067 goto error_attach_session; 1068 } 1069 break; 1070 } 1071 } 1072 1073 return rc; 1074 1075 /* Error cleanup paths. */ 1076 error_attach_session: 1077 error_get_ops: 1078 if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) { 1079 /* This also releases chained mbufs if any. */ 1080 rte_pktmbuf_free_bulk(dst_mbufs, cryop_cnt); 1081 } 1082 if (allocated > 0) { 1083 rte_mempool_put_bulk(g_crypto_op_mp, (void **)crypto_ops, 1084 allocated); 1085 } 1086 error_get_dst: 1087 /* This also releases chained mbufs if any. */ 1088 rte_pktmbuf_free_bulk(src_mbufs, cryop_cnt); 1089 return rc; 1090 } 1091 1092 /* This function is called after all channels have been quiesced following 1093 * a bdev reset. 1094 */ 1095 static void 1096 _ch_quiesce_done(struct spdk_io_channel_iter *i, int status) 1097 { 1098 struct crypto_bdev_io *io_ctx = spdk_io_channel_iter_get_ctx(i); 1099 1100 assert(TAILQ_EMPTY(&io_ctx->crypto_ch->pending_cry_ios)); 1101 assert(io_ctx->orig_io != NULL); 1102 1103 spdk_bdev_io_complete(io_ctx->orig_io, SPDK_BDEV_IO_STATUS_SUCCESS); 1104 } 1105 1106 /* This function is called per channel to quiesce IOs before completing a 1107 * bdev reset that we received. 1108 */ 1109 static void 1110 _ch_quiesce(struct spdk_io_channel_iter *i) 1111 { 1112 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 1113 struct crypto_io_channel *crypto_ch = spdk_io_channel_get_ctx(ch); 1114 1115 crypto_ch->iter = i; 1116 /* When the poller runs, it will see the non-NULL iter and handle 1117 * the quiesce. 1118 */ 1119 } 1120 1121 /* Completion callback for IO that were issued from this bdev other than read/write. 1122 * They have their own for readability. 1123 */ 1124 static void 1125 _complete_internal_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 1126 { 1127 struct spdk_bdev_io *orig_io = cb_arg; 1128 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 1129 1130 if (bdev_io->type == SPDK_BDEV_IO_TYPE_RESET) { 1131 struct crypto_bdev_io *orig_ctx = (struct crypto_bdev_io *)orig_io->driver_ctx; 1132 1133 assert(orig_io == orig_ctx->orig_io); 1134 1135 spdk_bdev_free_io(bdev_io); 1136 1137 spdk_for_each_channel(orig_ctx->crypto_bdev, 1138 _ch_quiesce, 1139 orig_ctx, 1140 _ch_quiesce_done); 1141 return; 1142 } 1143 1144 spdk_bdev_io_complete(orig_io, status); 1145 spdk_bdev_free_io(bdev_io); 1146 } 1147 1148 /* Completion callback for writes that were issued from this bdev. */ 1149 static void 1150 _complete_internal_write(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 1151 { 1152 struct spdk_bdev_io *orig_io = cb_arg; 1153 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 1154 struct crypto_bdev_io *orig_ctx = (struct crypto_bdev_io *)orig_io->driver_ctx; 1155 1156 spdk_bdev_io_put_aux_buf(orig_io, orig_ctx->aux_buf_raw); 1157 1158 spdk_bdev_io_complete(orig_io, status); 1159 spdk_bdev_free_io(bdev_io); 1160 } 1161 1162 /* Completion callback for reads that were issued from this bdev. */ 1163 static void 1164 _complete_internal_read(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 1165 { 1166 struct spdk_bdev_io *orig_io = cb_arg; 1167 struct crypto_bdev_io *orig_ctx = (struct crypto_bdev_io *)orig_io->driver_ctx; 1168 1169 if (success) { 1170 1171 /* Save off this bdev_io so it can be freed after decryption. */ 1172 orig_ctx->read_io = bdev_io; 1173 1174 if (!_crypto_operation(orig_io, RTE_CRYPTO_CIPHER_OP_DECRYPT, NULL)) { 1175 return; 1176 } else { 1177 SPDK_ERRLOG("Failed to decrypt!\n"); 1178 } 1179 } else { 1180 SPDK_ERRLOG("Failed to read prior to decrypting!\n"); 1181 } 1182 1183 spdk_bdev_io_complete(orig_io, SPDK_BDEV_IO_STATUS_FAILED); 1184 spdk_bdev_free_io(bdev_io); 1185 } 1186 1187 static void 1188 vbdev_crypto_resubmit_io(void *arg) 1189 { 1190 struct spdk_bdev_io *bdev_io = (struct spdk_bdev_io *)arg; 1191 struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx; 1192 1193 vbdev_crypto_submit_request(io_ctx->ch, bdev_io); 1194 } 1195 1196 static void 1197 vbdev_crypto_queue_io(struct spdk_bdev_io *bdev_io) 1198 { 1199 struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx; 1200 int rc; 1201 1202 io_ctx->bdev_io_wait.bdev = bdev_io->bdev; 1203 io_ctx->bdev_io_wait.cb_fn = vbdev_crypto_resubmit_io; 1204 io_ctx->bdev_io_wait.cb_arg = bdev_io; 1205 1206 rc = spdk_bdev_queue_io_wait(bdev_io->bdev, io_ctx->crypto_ch->base_ch, &io_ctx->bdev_io_wait); 1207 if (rc != 0) { 1208 SPDK_ERRLOG("Queue io failed in vbdev_crypto_queue_io, rc=%d.\n", rc); 1209 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 1210 } 1211 } 1212 1213 /* Callback for getting a buf from the bdev pool in the event that the caller passed 1214 * in NULL, we need to own the buffer so it doesn't get freed by another vbdev module 1215 * beneath us before we're done with it. 1216 */ 1217 static void 1218 crypto_read_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 1219 bool success) 1220 { 1221 struct vbdev_crypto *crypto_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_crypto, 1222 crypto_bdev); 1223 struct crypto_io_channel *crypto_ch = spdk_io_channel_get_ctx(ch); 1224 struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx; 1225 int rc; 1226 1227 if (!success) { 1228 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 1229 return; 1230 } 1231 1232 rc = spdk_bdev_readv_blocks(crypto_bdev->base_desc, crypto_ch->base_ch, bdev_io->u.bdev.iovs, 1233 bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.offset_blocks, 1234 bdev_io->u.bdev.num_blocks, _complete_internal_read, 1235 bdev_io); 1236 if (rc != 0) { 1237 if (rc == -ENOMEM) { 1238 SPDK_DEBUGLOG(vbdev_crypto, "No memory, queue the IO.\n"); 1239 io_ctx->ch = ch; 1240 vbdev_crypto_queue_io(bdev_io); 1241 } else { 1242 SPDK_ERRLOG("Failed to submit bdev_io!\n"); 1243 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 1244 } 1245 } 1246 } 1247 1248 /* For encryption we don't want to encrypt the data in place as the host isn't 1249 * expecting us to mangle its data buffers so we need to encrypt into the bdev 1250 * aux buffer, then we can use that as the source for the disk data transfer. 1251 */ 1252 static void 1253 crypto_write_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, 1254 void *aux_buf) 1255 { 1256 struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx; 1257 int rc = 0; 1258 1259 rc = _crypto_operation(bdev_io, RTE_CRYPTO_CIPHER_OP_ENCRYPT, aux_buf); 1260 if (rc != 0) { 1261 spdk_bdev_io_put_aux_buf(bdev_io, aux_buf); 1262 if (rc == -ENOMEM) { 1263 SPDK_DEBUGLOG(vbdev_crypto, "No memory, queue the IO.\n"); 1264 io_ctx->ch = ch; 1265 vbdev_crypto_queue_io(bdev_io); 1266 } else { 1267 SPDK_ERRLOG("Failed to submit bdev_io!\n"); 1268 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 1269 } 1270 } 1271 } 1272 1273 /* Called when someone submits IO to this crypto vbdev. For IO's not relevant to crypto, 1274 * we're simply passing it on here via SPDK IO calls which in turn allocate another bdev IO 1275 * and call our cpl callback provided below along with the original bdev_io so that we can 1276 * complete it once this IO completes. For crypto operations, we'll either encrypt it first 1277 * (writes) then call back into bdev to submit it or we'll submit a read and then catch it 1278 * on the way back for decryption. 1279 */ 1280 static void 1281 vbdev_crypto_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 1282 { 1283 struct vbdev_crypto *crypto_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_crypto, 1284 crypto_bdev); 1285 struct crypto_io_channel *crypto_ch = spdk_io_channel_get_ctx(ch); 1286 struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx; 1287 int rc = 0; 1288 1289 memset(io_ctx, 0, sizeof(struct crypto_bdev_io)); 1290 io_ctx->crypto_bdev = crypto_bdev; 1291 io_ctx->crypto_ch = crypto_ch; 1292 io_ctx->orig_io = bdev_io; 1293 io_ctx->bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS; 1294 1295 switch (bdev_io->type) { 1296 case SPDK_BDEV_IO_TYPE_READ: 1297 spdk_bdev_io_get_buf(bdev_io, crypto_read_get_buf_cb, 1298 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 1299 break; 1300 case SPDK_BDEV_IO_TYPE_WRITE: 1301 /* Tell the bdev layer that we need an aux buf in addition to the data 1302 * buf already associated with the bdev. 1303 */ 1304 spdk_bdev_io_get_aux_buf(bdev_io, crypto_write_get_buf_cb); 1305 break; 1306 case SPDK_BDEV_IO_TYPE_UNMAP: 1307 rc = spdk_bdev_unmap_blocks(crypto_bdev->base_desc, crypto_ch->base_ch, 1308 bdev_io->u.bdev.offset_blocks, 1309 bdev_io->u.bdev.num_blocks, 1310 _complete_internal_io, bdev_io); 1311 break; 1312 case SPDK_BDEV_IO_TYPE_FLUSH: 1313 rc = spdk_bdev_flush_blocks(crypto_bdev->base_desc, crypto_ch->base_ch, 1314 bdev_io->u.bdev.offset_blocks, 1315 bdev_io->u.bdev.num_blocks, 1316 _complete_internal_io, bdev_io); 1317 break; 1318 case SPDK_BDEV_IO_TYPE_RESET: 1319 rc = spdk_bdev_reset(crypto_bdev->base_desc, crypto_ch->base_ch, 1320 _complete_internal_io, bdev_io); 1321 break; 1322 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 1323 default: 1324 SPDK_ERRLOG("crypto: unknown I/O type %d\n", bdev_io->type); 1325 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 1326 return; 1327 } 1328 1329 if (rc != 0) { 1330 if (rc == -ENOMEM) { 1331 SPDK_DEBUGLOG(vbdev_crypto, "No memory, queue the IO.\n"); 1332 io_ctx->ch = ch; 1333 vbdev_crypto_queue_io(bdev_io); 1334 } else { 1335 SPDK_ERRLOG("Failed to submit bdev_io!\n"); 1336 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 1337 } 1338 } 1339 } 1340 1341 /* We'll just call the base bdev and let it answer except for WZ command which 1342 * we always say we don't support so that the bdev layer will actually send us 1343 * real writes that we can encrypt. 1344 */ 1345 static bool 1346 vbdev_crypto_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 1347 { 1348 struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx; 1349 1350 switch (io_type) { 1351 case SPDK_BDEV_IO_TYPE_WRITE: 1352 case SPDK_BDEV_IO_TYPE_UNMAP: 1353 case SPDK_BDEV_IO_TYPE_RESET: 1354 case SPDK_BDEV_IO_TYPE_READ: 1355 case SPDK_BDEV_IO_TYPE_FLUSH: 1356 return spdk_bdev_io_type_supported(crypto_bdev->base_bdev, io_type); 1357 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 1358 /* Force the bdev layer to issue actual writes of zeroes so we can 1359 * encrypt them as regular writes. 1360 */ 1361 default: 1362 return false; 1363 } 1364 } 1365 1366 /* Callback for unregistering the IO device. */ 1367 static void 1368 _device_unregister_cb(void *io_device) 1369 { 1370 struct vbdev_crypto *crypto_bdev = io_device; 1371 1372 /* Done with this crypto_bdev. */ 1373 rte_cryptodev_sym_session_free(crypto_bdev->session_decrypt); 1374 rte_cryptodev_sym_session_free(crypto_bdev->session_encrypt); 1375 crypto_bdev->opts = NULL; 1376 free(crypto_bdev->crypto_bdev.name); 1377 free(crypto_bdev); 1378 } 1379 1380 /* Wrapper for the bdev close operation. */ 1381 static void 1382 _vbdev_crypto_destruct(void *ctx) 1383 { 1384 struct spdk_bdev_desc *desc = ctx; 1385 1386 spdk_bdev_close(desc); 1387 } 1388 1389 /* Called after we've unregistered following a hot remove callback. 1390 * Our finish entry point will be called next. 1391 */ 1392 static int 1393 vbdev_crypto_destruct(void *ctx) 1394 { 1395 struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx; 1396 1397 /* Remove this device from the internal list */ 1398 TAILQ_REMOVE(&g_vbdev_crypto, crypto_bdev, link); 1399 1400 /* Unclaim the underlying bdev. */ 1401 spdk_bdev_module_release_bdev(crypto_bdev->base_bdev); 1402 1403 /* Close the underlying bdev on its same opened thread. */ 1404 if (crypto_bdev->thread && crypto_bdev->thread != spdk_get_thread()) { 1405 spdk_thread_send_msg(crypto_bdev->thread, _vbdev_crypto_destruct, crypto_bdev->base_desc); 1406 } else { 1407 spdk_bdev_close(crypto_bdev->base_desc); 1408 } 1409 1410 /* Unregister the io_device. */ 1411 spdk_io_device_unregister(crypto_bdev, _device_unregister_cb); 1412 1413 g_number_of_claimed_volumes--; 1414 1415 return 0; 1416 } 1417 1418 /* We supplied this as an entry point for upper layers who want to communicate to this 1419 * bdev. This is how they get a channel. We are passed the same context we provided when 1420 * we created our crypto vbdev in examine() which, for this bdev, is the address of one of 1421 * our context nodes. From here we'll ask the SPDK channel code to fill out our channel 1422 * struct and we'll keep it in our crypto node. 1423 */ 1424 static struct spdk_io_channel * 1425 vbdev_crypto_get_io_channel(void *ctx) 1426 { 1427 struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx; 1428 1429 /* The IO channel code will allocate a channel for us which consists of 1430 * the SPDK channel structure plus the size of our crypto_io_channel struct 1431 * that we passed in when we registered our IO device. It will then call 1432 * our channel create callback to populate any elements that we need to 1433 * update. 1434 */ 1435 return spdk_get_io_channel(crypto_bdev); 1436 } 1437 1438 /* This is the output for bdev_get_bdevs() for this vbdev */ 1439 static int 1440 vbdev_crypto_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 1441 { 1442 struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx; 1443 char *hexkey = NULL, *hexkey2 = NULL; 1444 int rc = 0; 1445 1446 hexkey = hexlify(crypto_bdev->opts->key, 1447 crypto_bdev->opts->key_size); 1448 if (!hexkey) { 1449 return -ENOMEM; 1450 } 1451 1452 if (crypto_bdev->opts->key2) { 1453 hexkey2 = hexlify(crypto_bdev->opts->key2, 1454 crypto_bdev->opts->key2_size); 1455 if (!hexkey2) { 1456 rc = -ENOMEM; 1457 goto out_err; 1458 } 1459 } 1460 1461 spdk_json_write_name(w, "crypto"); 1462 spdk_json_write_object_begin(w); 1463 spdk_json_write_named_string(w, "base_bdev_name", spdk_bdev_get_name(crypto_bdev->base_bdev)); 1464 spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&crypto_bdev->crypto_bdev)); 1465 spdk_json_write_named_string(w, "crypto_pmd", crypto_bdev->opts->drv_name); 1466 spdk_json_write_named_string(w, "key", hexkey); 1467 if (hexkey2) { 1468 spdk_json_write_named_string(w, "key2", hexkey2); 1469 } 1470 spdk_json_write_named_string(w, "cipher", crypto_bdev->opts->cipher); 1471 spdk_json_write_object_end(w); 1472 out_err: 1473 if (hexkey) { 1474 memset(hexkey, 0, strlen(hexkey)); 1475 free(hexkey); 1476 } 1477 if (hexkey2) { 1478 memset(hexkey2, 0, strlen(hexkey2)); 1479 free(hexkey2); 1480 } 1481 return rc; 1482 } 1483 1484 static int 1485 vbdev_crypto_config_json(struct spdk_json_write_ctx *w) 1486 { 1487 struct vbdev_crypto *crypto_bdev; 1488 1489 TAILQ_FOREACH(crypto_bdev, &g_vbdev_crypto, link) { 1490 char *hexkey = NULL, *hexkey2 = NULL; 1491 1492 hexkey = hexlify(crypto_bdev->opts->key, 1493 crypto_bdev->opts->key_size); 1494 if (!hexkey) { 1495 return -ENOMEM; 1496 } 1497 1498 if (crypto_bdev->opts->key2) { 1499 hexkey2 = hexlify(crypto_bdev->opts->key2, 1500 crypto_bdev->opts->key2_size); 1501 if (!hexkey2) { 1502 memset(hexkey, 0, strlen(hexkey)); 1503 free(hexkey); 1504 return -ENOMEM; 1505 } 1506 } 1507 1508 spdk_json_write_object_begin(w); 1509 spdk_json_write_named_string(w, "method", "bdev_crypto_create"); 1510 spdk_json_write_named_object_begin(w, "params"); 1511 spdk_json_write_named_string(w, "base_bdev_name", spdk_bdev_get_name(crypto_bdev->base_bdev)); 1512 spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&crypto_bdev->crypto_bdev)); 1513 spdk_json_write_named_string(w, "crypto_pmd", crypto_bdev->opts->drv_name); 1514 spdk_json_write_named_string(w, "key", hexkey); 1515 if (hexkey2) { 1516 spdk_json_write_named_string(w, "key2", hexkey2); 1517 } 1518 spdk_json_write_named_string(w, "cipher", crypto_bdev->opts->cipher); 1519 spdk_json_write_object_end(w); 1520 spdk_json_write_object_end(w); 1521 1522 if (hexkey) { 1523 memset(hexkey, 0, strlen(hexkey)); 1524 free(hexkey); 1525 } 1526 if (hexkey2) { 1527 memset(hexkey2, 0, strlen(hexkey2)); 1528 free(hexkey2); 1529 } 1530 } 1531 return 0; 1532 } 1533 1534 /* Helper function for the channel creation callback. */ 1535 static void 1536 _assign_device_qp(struct vbdev_crypto *crypto_bdev, struct device_qp *device_qp, 1537 struct crypto_io_channel *crypto_ch) 1538 { 1539 pthread_mutex_lock(&g_device_qp_lock); 1540 if (strcmp(crypto_bdev->opts->drv_name, QAT) == 0) { 1541 /* For some QAT devices, the optimal qp to use is every 32nd as this spreads the 1542 * workload out over the multiple virtual functions in the device. For the devices 1543 * where this isn't the case, it doesn't hurt. 1544 */ 1545 TAILQ_FOREACH(device_qp, &g_device_qp_qat, link) { 1546 if (device_qp->index != g_next_qat_index) { 1547 continue; 1548 } 1549 if (device_qp->in_use == false) { 1550 crypto_ch->device_qp = device_qp; 1551 device_qp->in_use = true; 1552 g_next_qat_index = (g_next_qat_index + QAT_VF_SPREAD) % g_qat_total_qp; 1553 break; 1554 } else { 1555 /* if the preferred index is used, skip to the next one in this set. */ 1556 g_next_qat_index = (g_next_qat_index + 1) % g_qat_total_qp; 1557 } 1558 } 1559 } else if (strcmp(crypto_bdev->opts->drv_name, AESNI_MB) == 0) { 1560 TAILQ_FOREACH(device_qp, &g_device_qp_aesni_mb, link) { 1561 if (device_qp->in_use == false) { 1562 crypto_ch->device_qp = device_qp; 1563 device_qp->in_use = true; 1564 break; 1565 } 1566 } 1567 } else if (strcmp(crypto_bdev->opts->drv_name, MLX5) == 0) { 1568 TAILQ_FOREACH(device_qp, &g_device_qp_mlx5, link) { 1569 if (device_qp->in_use == false) { 1570 crypto_ch->device_qp = device_qp; 1571 device_qp->in_use = true; 1572 break; 1573 } 1574 } 1575 } 1576 pthread_mutex_unlock(&g_device_qp_lock); 1577 } 1578 1579 /* We provide this callback for the SPDK channel code to create a channel using 1580 * the channel struct we provided in our module get_io_channel() entry point. Here 1581 * we get and save off an underlying base channel of the device below us so that 1582 * we can communicate with the base bdev on a per channel basis. We also register the 1583 * poller used to complete crypto operations from the device. 1584 */ 1585 static int 1586 crypto_bdev_ch_create_cb(void *io_device, void *ctx_buf) 1587 { 1588 struct crypto_io_channel *crypto_ch = ctx_buf; 1589 struct vbdev_crypto *crypto_bdev = io_device; 1590 struct device_qp *device_qp = NULL; 1591 1592 crypto_ch->base_ch = spdk_bdev_get_io_channel(crypto_bdev->base_desc); 1593 crypto_ch->poller = SPDK_POLLER_REGISTER(crypto_dev_poller, crypto_ch, 0); 1594 crypto_ch->device_qp = NULL; 1595 1596 /* Assign a device/qp combination that is unique per channel per PMD. */ 1597 _assign_device_qp(crypto_bdev, device_qp, crypto_ch); 1598 assert(crypto_ch->device_qp); 1599 1600 /* We use this queue to track outstanding IO in our layer. */ 1601 TAILQ_INIT(&crypto_ch->pending_cry_ios); 1602 1603 /* We use this to queue up crypto ops when the device is busy. */ 1604 TAILQ_INIT(&crypto_ch->queued_cry_ops); 1605 1606 return 0; 1607 } 1608 1609 /* We provide this callback for the SPDK channel code to destroy a channel 1610 * created with our create callback. We just need to undo anything we did 1611 * when we created. 1612 */ 1613 static void 1614 crypto_bdev_ch_destroy_cb(void *io_device, void *ctx_buf) 1615 { 1616 struct crypto_io_channel *crypto_ch = ctx_buf; 1617 1618 pthread_mutex_lock(&g_device_qp_lock); 1619 crypto_ch->device_qp->in_use = false; 1620 pthread_mutex_unlock(&g_device_qp_lock); 1621 1622 spdk_poller_unregister(&crypto_ch->poller); 1623 spdk_put_io_channel(crypto_ch->base_ch); 1624 } 1625 1626 /* Create the association from the bdev and vbdev name and insert 1627 * on the global list. */ 1628 static int 1629 vbdev_crypto_insert_name(struct vbdev_crypto_opts *opts, struct bdev_names **out) 1630 { 1631 struct bdev_names *name; 1632 bool found = false; 1633 int j; 1634 1635 assert(opts); 1636 assert(out); 1637 1638 TAILQ_FOREACH(name, &g_bdev_names, link) { 1639 if (strcmp(opts->vbdev_name, name->opts->vbdev_name) == 0) { 1640 SPDK_ERRLOG("Crypto bdev %s already exists\n", opts->vbdev_name); 1641 return -EEXIST; 1642 } 1643 } 1644 1645 for (j = 0; j < MAX_NUM_DRV_TYPES ; j++) { 1646 if (strcmp(opts->drv_name, g_driver_names[j]) == 0) { 1647 found = true; 1648 break; 1649 } 1650 } 1651 if (!found) { 1652 SPDK_ERRLOG("Crypto PMD type %s is not supported.\n", opts->drv_name); 1653 return -EINVAL; 1654 } 1655 1656 name = calloc(1, sizeof(struct bdev_names)); 1657 if (!name) { 1658 SPDK_ERRLOG("Failed to allocate memory for bdev_names.\n"); 1659 return -ENOMEM; 1660 } 1661 1662 name->opts = opts; 1663 TAILQ_INSERT_TAIL(&g_bdev_names, name, link); 1664 *out = name; 1665 1666 return 0; 1667 } 1668 1669 void 1670 free_crypto_opts(struct vbdev_crypto_opts *opts) 1671 { 1672 free(opts->bdev_name); 1673 free(opts->vbdev_name); 1674 free(opts->drv_name); 1675 if (opts->xts_key) { 1676 memset(opts->xts_key, 0, 1677 opts->key_size + opts->key2_size); 1678 free(opts->xts_key); 1679 } 1680 memset(opts->key, 0, opts->key_size); 1681 free(opts->key); 1682 opts->key_size = 0; 1683 if (opts->key2) { 1684 memset(opts->key2, 0, opts->key2_size); 1685 free(opts->key2); 1686 } 1687 opts->key2_size = 0; 1688 free(opts); 1689 } 1690 1691 static void 1692 vbdev_crypto_delete_name(struct bdev_names *name) 1693 { 1694 TAILQ_REMOVE(&g_bdev_names, name, link); 1695 if (name->opts) { 1696 free_crypto_opts(name->opts); 1697 name->opts = NULL; 1698 } 1699 free(name); 1700 } 1701 1702 /* RPC entry point for crypto creation. */ 1703 int 1704 create_crypto_disk(struct vbdev_crypto_opts *opts) 1705 { 1706 struct bdev_names *name = NULL; 1707 int rc; 1708 1709 rc = vbdev_crypto_insert_name(opts, &name); 1710 if (rc) { 1711 return rc; 1712 } 1713 1714 rc = vbdev_crypto_claim(opts->bdev_name); 1715 if (rc == -ENODEV) { 1716 SPDK_NOTICELOG("vbdev creation deferred pending base bdev arrival\n"); 1717 rc = 0; 1718 } 1719 1720 if (rc) { 1721 assert(name != NULL); 1722 /* In case of error we let the caller function to deallocate @opts 1723 * since it is its responsibiltiy. Setting name->opts = NULL let's 1724 * vbdev_crypto_delete_name() know it does not have to do anything 1725 * about @opts. 1726 */ 1727 name->opts = NULL; 1728 vbdev_crypto_delete_name(name); 1729 } 1730 return rc; 1731 } 1732 1733 /* Called at driver init time, parses config file to prepare for examine calls, 1734 * also fully initializes the crypto drivers. 1735 */ 1736 static int 1737 vbdev_crypto_init(void) 1738 { 1739 int rc = 0; 1740 1741 /* Fully configure both SW and HW drivers. */ 1742 rc = vbdev_crypto_init_crypto_drivers(); 1743 if (rc) { 1744 SPDK_ERRLOG("Error setting up crypto devices\n"); 1745 } 1746 1747 return rc; 1748 } 1749 1750 /* Called when the entire module is being torn down. */ 1751 static void 1752 vbdev_crypto_finish(void) 1753 { 1754 struct bdev_names *name; 1755 struct vbdev_dev *device; 1756 1757 while ((name = TAILQ_FIRST(&g_bdev_names))) { 1758 vbdev_crypto_delete_name(name); 1759 } 1760 1761 while ((device = TAILQ_FIRST(&g_vbdev_devs))) { 1762 TAILQ_REMOVE(&g_vbdev_devs, device, link); 1763 release_vbdev_dev(device); 1764 } 1765 rte_vdev_uninit(AESNI_MB); 1766 1767 /* These are removed in release_vbdev_dev() */ 1768 assert(TAILQ_EMPTY(&g_device_qp_qat)); 1769 assert(TAILQ_EMPTY(&g_device_qp_aesni_mb)); 1770 assert(TAILQ_EMPTY(&g_device_qp_mlx5)); 1771 1772 rte_mempool_free(g_crypto_op_mp); 1773 rte_mempool_free(g_mbuf_mp); 1774 rte_mempool_free(g_session_mp); 1775 if (g_session_mp_priv != NULL) { 1776 rte_mempool_free(g_session_mp_priv); 1777 } 1778 } 1779 1780 /* During init we'll be asked how much memory we'd like passed to us 1781 * in bev_io structures as context. Here's where we specify how 1782 * much context we want per IO. 1783 */ 1784 static int 1785 vbdev_crypto_get_ctx_size(void) 1786 { 1787 return sizeof(struct crypto_bdev_io); 1788 } 1789 1790 static void 1791 vbdev_crypto_base_bdev_hotremove_cb(struct spdk_bdev *bdev_find) 1792 { 1793 struct vbdev_crypto *crypto_bdev, *tmp; 1794 1795 TAILQ_FOREACH_SAFE(crypto_bdev, &g_vbdev_crypto, link, tmp) { 1796 if (bdev_find == crypto_bdev->base_bdev) { 1797 spdk_bdev_unregister(&crypto_bdev->crypto_bdev, NULL, NULL); 1798 } 1799 } 1800 } 1801 1802 /* Called when the underlying base bdev triggers asynchronous event such as bdev removal. */ 1803 static void 1804 vbdev_crypto_base_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 1805 void *event_ctx) 1806 { 1807 switch (type) { 1808 case SPDK_BDEV_EVENT_REMOVE: 1809 vbdev_crypto_base_bdev_hotremove_cb(bdev); 1810 break; 1811 default: 1812 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); 1813 break; 1814 } 1815 } 1816 1817 static void 1818 vbdev_crypto_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 1819 { 1820 /* No config per bdev needed */ 1821 } 1822 1823 /* When we register our bdev this is how we specify our entry points. */ 1824 static const struct spdk_bdev_fn_table vbdev_crypto_fn_table = { 1825 .destruct = vbdev_crypto_destruct, 1826 .submit_request = vbdev_crypto_submit_request, 1827 .io_type_supported = vbdev_crypto_io_type_supported, 1828 .get_io_channel = vbdev_crypto_get_io_channel, 1829 .dump_info_json = vbdev_crypto_dump_info_json, 1830 .write_config_json = vbdev_crypto_write_config_json 1831 }; 1832 1833 static struct spdk_bdev_module crypto_if = { 1834 .name = "crypto", 1835 .module_init = vbdev_crypto_init, 1836 .get_ctx_size = vbdev_crypto_get_ctx_size, 1837 .examine_config = vbdev_crypto_examine, 1838 .module_fini = vbdev_crypto_finish, 1839 .config_json = vbdev_crypto_config_json 1840 }; 1841 1842 SPDK_BDEV_MODULE_REGISTER(crypto, &crypto_if) 1843 1844 static int 1845 vbdev_crypto_claim(const char *bdev_name) 1846 { 1847 struct bdev_names *name; 1848 struct vbdev_crypto *vbdev; 1849 struct vbdev_dev *device; 1850 struct spdk_bdev *bdev; 1851 bool found = false; 1852 uint8_t key_size; 1853 int rc = 0; 1854 1855 if (g_number_of_claimed_volumes >= MAX_CRYPTO_VOLUMES) { 1856 SPDK_DEBUGLOG(vbdev_crypto, "Reached max number of claimed volumes\n"); 1857 return -EINVAL; 1858 } 1859 g_number_of_claimed_volumes++; 1860 1861 /* Check our list of names from config versus this bdev and if 1862 * there's a match, create the crypto_bdev & bdev accordingly. 1863 */ 1864 TAILQ_FOREACH(name, &g_bdev_names, link) { 1865 if (strcmp(name->opts->bdev_name, bdev_name) != 0) { 1866 continue; 1867 } 1868 SPDK_DEBUGLOG(vbdev_crypto, "Match on %s\n", bdev_name); 1869 1870 vbdev = calloc(1, sizeof(struct vbdev_crypto)); 1871 if (!vbdev) { 1872 SPDK_ERRLOG("Failed to allocate memory for crypto_bdev.\n"); 1873 rc = -ENOMEM; 1874 goto error_vbdev_alloc; 1875 } 1876 vbdev->crypto_bdev.product_name = "crypto"; 1877 1878 vbdev->crypto_bdev.name = strdup(name->opts->vbdev_name); 1879 if (!vbdev->crypto_bdev.name) { 1880 SPDK_ERRLOG("Failed to allocate memory for crypto_bdev name.\n"); 1881 rc = -ENOMEM; 1882 goto error_bdev_name; 1883 } 1884 1885 rc = spdk_bdev_open_ext(bdev_name, true, vbdev_crypto_base_bdev_event_cb, 1886 NULL, &vbdev->base_desc); 1887 if (rc) { 1888 if (rc != -ENODEV) { 1889 SPDK_ERRLOG("Failed to open bdev %s: error %d\n", bdev_name, rc); 1890 } 1891 goto error_open; 1892 } 1893 1894 bdev = spdk_bdev_desc_get_bdev(vbdev->base_desc); 1895 vbdev->base_bdev = bdev; 1896 1897 if (strcmp(name->opts->drv_name, MLX5) == 0) { 1898 vbdev->qp_desc_nr = CRYPTO_QP_DESCRIPTORS_MLX5; 1899 } else { 1900 vbdev->qp_desc_nr = CRYPTO_QP_DESCRIPTORS; 1901 } 1902 1903 vbdev->crypto_bdev.write_cache = bdev->write_cache; 1904 if (strcmp(name->opts->drv_name, QAT) == 0) { 1905 vbdev->crypto_bdev.required_alignment = 1906 spdk_max(spdk_u32log2(bdev->blocklen), bdev->required_alignment); 1907 SPDK_NOTICELOG("QAT in use: Required alignment set to %u\n", 1908 vbdev->crypto_bdev.required_alignment); 1909 SPDK_NOTICELOG("QAT using cipher: %s\n", name->opts->cipher); 1910 } else if (strcmp(name->opts->drv_name, MLX5) == 0) { 1911 vbdev->crypto_bdev.required_alignment = bdev->required_alignment; 1912 SPDK_NOTICELOG("MLX5 using cipher: %s\n", name->opts->cipher); 1913 } else { 1914 vbdev->crypto_bdev.required_alignment = bdev->required_alignment; 1915 SPDK_NOTICELOG("AESNI_MB using cipher: %s\n", name->opts->cipher); 1916 } 1917 vbdev->cipher_xform.cipher.iv.length = IV_LENGTH; 1918 1919 /* Note: CRYPTO_MAX_IO is in units of bytes, optimal_io_boundary is 1920 * in units of blocks. 1921 */ 1922 if (bdev->optimal_io_boundary > 0) { 1923 vbdev->crypto_bdev.optimal_io_boundary = 1924 spdk_min((CRYPTO_MAX_IO / bdev->blocklen), bdev->optimal_io_boundary); 1925 } else { 1926 vbdev->crypto_bdev.optimal_io_boundary = (CRYPTO_MAX_IO / bdev->blocklen); 1927 } 1928 vbdev->crypto_bdev.split_on_optimal_io_boundary = true; 1929 vbdev->crypto_bdev.blocklen = bdev->blocklen; 1930 vbdev->crypto_bdev.blockcnt = bdev->blockcnt; 1931 1932 /* This is the context that is passed to us when the bdev 1933 * layer calls in so we'll save our crypto_bdev node here. 1934 */ 1935 vbdev->crypto_bdev.ctxt = vbdev; 1936 vbdev->crypto_bdev.fn_table = &vbdev_crypto_fn_table; 1937 vbdev->crypto_bdev.module = &crypto_if; 1938 1939 /* Assign crypto opts from the name. The pointer is valid up to the point 1940 * the module is unloaded and all names removed from the list. */ 1941 vbdev->opts = name->opts; 1942 1943 TAILQ_INSERT_TAIL(&g_vbdev_crypto, vbdev, link); 1944 1945 spdk_io_device_register(vbdev, crypto_bdev_ch_create_cb, crypto_bdev_ch_destroy_cb, 1946 sizeof(struct crypto_io_channel), vbdev->crypto_bdev.name); 1947 1948 /* Save the thread where the base device is opened */ 1949 vbdev->thread = spdk_get_thread(); 1950 1951 rc = spdk_bdev_module_claim_bdev(bdev, vbdev->base_desc, vbdev->crypto_bdev.module); 1952 if (rc) { 1953 SPDK_ERRLOG("Failed to claim bdev %s\n", spdk_bdev_get_name(bdev)); 1954 goto error_claim; 1955 } 1956 1957 /* To init the session we have to get the cryptoDev device ID for this vbdev */ 1958 TAILQ_FOREACH(device, &g_vbdev_devs, link) { 1959 if (strcmp(device->cdev_info.driver_name, vbdev->opts->drv_name) == 0) { 1960 found = true; 1961 break; 1962 } 1963 } 1964 if (found == false) { 1965 SPDK_ERRLOG("Failed to match crypto device driver to crypto vbdev.\n"); 1966 rc = -EINVAL; 1967 goto error_cant_find_devid; 1968 } 1969 1970 /* Get sessions. */ 1971 vbdev->session_encrypt = rte_cryptodev_sym_session_create(g_session_mp); 1972 if (NULL == vbdev->session_encrypt) { 1973 SPDK_ERRLOG("Failed to create encrypt crypto session.\n"); 1974 rc = -EINVAL; 1975 goto error_session_en_create; 1976 } 1977 1978 vbdev->session_decrypt = rte_cryptodev_sym_session_create(g_session_mp); 1979 if (NULL == vbdev->session_decrypt) { 1980 SPDK_ERRLOG("Failed to create decrypt crypto session.\n"); 1981 rc = -EINVAL; 1982 goto error_session_de_create; 1983 } 1984 1985 /* Init our per vbdev xform with the desired cipher options. */ 1986 vbdev->cipher_xform.type = RTE_CRYPTO_SYM_XFORM_CIPHER; 1987 vbdev->cipher_xform.cipher.iv.offset = IV_OFFSET; 1988 if (strcmp(vbdev->opts->cipher, AES_CBC) == 0) { 1989 vbdev->cipher_xform.cipher.key.data = vbdev->opts->key; 1990 vbdev->cipher_xform.cipher.key.length = vbdev->opts->key_size; 1991 vbdev->cipher_xform.cipher.algo = RTE_CRYPTO_CIPHER_AES_CBC; 1992 } else if (strcmp(vbdev->opts->cipher, AES_XTS) == 0) { 1993 key_size = vbdev->opts->key_size + vbdev->opts->key2_size; 1994 vbdev->cipher_xform.cipher.key.data = vbdev->opts->xts_key; 1995 vbdev->cipher_xform.cipher.key.length = key_size; 1996 vbdev->cipher_xform.cipher.algo = RTE_CRYPTO_CIPHER_AES_XTS; 1997 } else { 1998 SPDK_ERRLOG("Invalid cipher name %s.\n", vbdev->opts->cipher); 1999 rc = -EINVAL; 2000 goto error_session_de_create; 2001 } 2002 vbdev->cipher_xform.cipher.iv.length = IV_LENGTH; 2003 2004 vbdev->cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT; 2005 rc = rte_cryptodev_sym_session_init(device->cdev_id, vbdev->session_encrypt, 2006 &vbdev->cipher_xform, 2007 g_session_mp_priv ? g_session_mp_priv : g_session_mp); 2008 if (rc < 0) { 2009 SPDK_ERRLOG("Failed to init encrypt session: error %d\n", rc); 2010 rc = -EINVAL; 2011 goto error_session_init; 2012 } 2013 2014 vbdev->cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_DECRYPT; 2015 rc = rte_cryptodev_sym_session_init(device->cdev_id, vbdev->session_decrypt, 2016 &vbdev->cipher_xform, 2017 g_session_mp_priv ? g_session_mp_priv : g_session_mp); 2018 if (rc < 0) { 2019 SPDK_ERRLOG("Failed to init decrypt session: error %d\n", rc); 2020 rc = -EINVAL; 2021 goto error_session_init; 2022 } 2023 2024 rc = spdk_bdev_register(&vbdev->crypto_bdev); 2025 if (rc < 0) { 2026 SPDK_ERRLOG("Failed to register vbdev: error %d\n", rc); 2027 rc = -EINVAL; 2028 goto error_bdev_register; 2029 } 2030 SPDK_DEBUGLOG(vbdev_crypto, "Registered io_device and virtual bdev for: %s\n", 2031 vbdev->opts->vbdev_name); 2032 break; 2033 } 2034 2035 return rc; 2036 2037 /* Error cleanup paths. */ 2038 error_bdev_register: 2039 error_session_init: 2040 rte_cryptodev_sym_session_free(vbdev->session_decrypt); 2041 error_session_de_create: 2042 rte_cryptodev_sym_session_free(vbdev->session_encrypt); 2043 error_session_en_create: 2044 error_cant_find_devid: 2045 spdk_bdev_module_release_bdev(vbdev->base_bdev); 2046 error_claim: 2047 TAILQ_REMOVE(&g_vbdev_crypto, vbdev, link); 2048 spdk_io_device_unregister(vbdev, NULL); 2049 spdk_bdev_close(vbdev->base_desc); 2050 error_open: 2051 free(vbdev->crypto_bdev.name); 2052 error_bdev_name: 2053 free(vbdev); 2054 error_vbdev_alloc: 2055 g_number_of_claimed_volumes--; 2056 return rc; 2057 } 2058 2059 /* RPC entry for deleting a crypto vbdev. */ 2060 void 2061 delete_crypto_disk(const char *bdev_name, spdk_delete_crypto_complete cb_fn, 2062 void *cb_arg) 2063 { 2064 struct bdev_names *name; 2065 int rc; 2066 2067 /* Some cleanup happens in the destruct callback. */ 2068 rc = spdk_bdev_unregister_by_name(bdev_name, &crypto_if, cb_fn, cb_arg); 2069 if (rc == 0) { 2070 /* Remove the association (vbdev, bdev) from g_bdev_names. This is required so that the 2071 * vbdev does not get re-created if the same bdev is constructed at some other time, 2072 * unless the underlying bdev was hot-removed. 2073 */ 2074 TAILQ_FOREACH(name, &g_bdev_names, link) { 2075 if (strcmp(name->opts->vbdev_name, bdev_name) == 0) { 2076 vbdev_crypto_delete_name(name); 2077 break; 2078 } 2079 } 2080 } else { 2081 cb_fn(cb_arg, rc); 2082 } 2083 } 2084 2085 /* Because we specified this function in our crypto bdev function table when we 2086 * registered our crypto bdev, we'll get this call anytime a new bdev shows up. 2087 * Here we need to decide if we care about it and if so what to do. We 2088 * parsed the config file at init so we check the new bdev against the list 2089 * we built up at that time and if the user configured us to attach to this 2090 * bdev, here's where we do it. 2091 */ 2092 static void 2093 vbdev_crypto_examine(struct spdk_bdev *bdev) 2094 { 2095 vbdev_crypto_claim(spdk_bdev_get_name(bdev)); 2096 spdk_bdev_module_examine_done(&crypto_if); 2097 } 2098 2099 SPDK_LOG_REGISTER_COMPONENT(vbdev_crypto) 2100