xref: /spdk/module/bdev/crypto/vbdev_crypto.c (revision 48701bd9552cd4394cd2d3cbd1731e54dddaf2ed)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUcryptoION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "vbdev_crypto.h"
35 
36 #include "spdk/env.h"
37 #include "spdk/endian.h"
38 #include "spdk/thread.h"
39 #include "spdk/bdev_module.h"
40 #include "spdk/log.h"
41 
42 #include <rte_config.h>
43 #include <rte_version.h>
44 #include <rte_bus_vdev.h>
45 #include <rte_crypto.h>
46 #include <rte_cryptodev.h>
47 #include <rte_cryptodev_pmd.h>
48 
49 /* To add support for new device types, follow the examples of the following...
50  * Note that the string names are defined by the DPDK PMD in question so be
51  * sure to use the exact names.
52  */
53 #define MAX_NUM_DRV_TYPES 2
54 
55 /* The VF spread is the number of queue pairs between virtual functions, we use this to
56  * load balance the QAT device.
57  */
58 #define QAT_VF_SPREAD 32
59 static uint8_t g_qat_total_qp = 0;
60 static uint8_t g_next_qat_index;
61 
62 const char *g_driver_names[MAX_NUM_DRV_TYPES] = { AESNI_MB, QAT };
63 
64 /* Global list of available crypto devices. */
65 struct vbdev_dev {
66 	struct rte_cryptodev_info	cdev_info;	/* includes device friendly name */
67 	uint8_t				cdev_id;	/* identifier for the device */
68 	TAILQ_ENTRY(vbdev_dev)		link;
69 };
70 static TAILQ_HEAD(, vbdev_dev) g_vbdev_devs = TAILQ_HEAD_INITIALIZER(g_vbdev_devs);
71 
72 /* Global list and lock for unique device/queue pair combos. We keep 1 list per supported PMD
73  * so that we can optimize per PMD where it make sense. For example, with QAT there an optimal
74  * pattern for assigning queue pairs where with AESNI there is not.
75  */
76 struct device_qp {
77 	struct vbdev_dev		*device;	/* ptr to crypto device */
78 	uint8_t				qp;		/* queue pair for this node */
79 	bool				in_use;		/* whether this node is in use or not */
80 	uint8_t				index;		/* used by QAT to load balance placement of qpairs */
81 	TAILQ_ENTRY(device_qp)		link;
82 };
83 static TAILQ_HEAD(, device_qp) g_device_qp_qat = TAILQ_HEAD_INITIALIZER(g_device_qp_qat);
84 static TAILQ_HEAD(, device_qp) g_device_qp_aesni_mb = TAILQ_HEAD_INITIALIZER(g_device_qp_aesni_mb);
85 static pthread_mutex_t g_device_qp_lock = PTHREAD_MUTEX_INITIALIZER;
86 
87 
88 /* In order to limit the number of resources we need to do one crypto
89  * operation per LBA (we use LBA as IV), we tell the bdev layer that
90  * our max IO size is something reasonable. Units here are in bytes.
91  */
92 #define CRYPTO_MAX_IO		(64 * 1024)
93 
94 /* This controls how many ops will be dequeued from the crypto driver in one run
95  * of the poller. It is mainly a performance knob as it effectively determines how
96  * much work the poller has to do.  However even that can vary between crypto drivers
97  * as the AESNI_MB driver for example does all the crypto work on dequeue whereas the
98  * QAT driver just dequeues what has been completed already.
99  */
100 #define MAX_DEQUEUE_BURST_SIZE	64
101 
102 /* When enqueueing, we need to supply the crypto driver with an array of pointers to
103  * operation structs. As each of these can be max 512B, we can adjust the CRYPTO_MAX_IO
104  * value in conjunction with the other defines to make sure we're not using crazy amounts
105  * of memory. All of these numbers can and probably should be adjusted based on the
106  * workload. By default we'll use the worst case (smallest) block size for the
107  * minimum number of array entries. As an example, a CRYPTO_MAX_IO size of 64K with 512B
108  * blocks would give us an enqueue array size of 128.
109  */
110 #define MAX_ENQUEUE_ARRAY_SIZE (CRYPTO_MAX_IO / 512)
111 
112 /* The number of MBUFS we need must be a power of two and to support other small IOs
113  * in addition to the limits mentioned above, we go to the next power of two. It is
114  * big number because it is one mempool for source and destination mbufs. It may
115  * need to be bigger to support multiple crypto drivers at once.
116  */
117 #define NUM_MBUFS		32768
118 #define POOL_CACHE_SIZE		256
119 #define MAX_CRYPTO_VOLUMES	128
120 #define NUM_SESSIONS		(2 * MAX_CRYPTO_VOLUMES)
121 #define SESS_MEMPOOL_CACHE_SIZE 0
122 uint8_t g_number_of_claimed_volumes = 0;
123 
124 /* This is the max number of IOs we can supply to any crypto device QP at one time.
125  * It can vary between drivers.
126  */
127 #define CRYPTO_QP_DESCRIPTORS	2048
128 
129 /* Specific to AES_CBC. */
130 #define AES_CBC_IV_LENGTH	16
131 #define AES_CBC_KEY_LENGTH	16
132 #define AES_XTS_KEY_LENGTH	16	/* XTS uses 2 keys, each of this size. */
133 #define AESNI_MB_NUM_QP		64
134 
135 /* Common for suported devices. */
136 #define IV_OFFSET            (sizeof(struct rte_crypto_op) + \
137 				sizeof(struct rte_crypto_sym_op))
138 #define QUEUED_OP_OFFSET (IV_OFFSET + AES_CBC_IV_LENGTH)
139 
140 static void _complete_internal_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
141 static void _complete_internal_read(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
142 static void _complete_internal_write(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
143 static void vbdev_crypto_examine(struct spdk_bdev *bdev);
144 static int vbdev_crypto_claim(const char *bdev_name);
145 static void vbdev_crypto_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io);
146 
147 /* List of crypto_bdev names and their base bdevs via configuration file. */
148 struct bdev_names {
149 	char			*vbdev_name;	/* name of the vbdev to create */
150 	char			*bdev_name;	/* base bdev name */
151 
152 	/* Note, for dev/test we allow use of key in the config file, for production
153 	 * use, you must use an RPC to specify the key for security reasons.
154 	 */
155 	uint8_t			*key;		/* key per bdev */
156 	char			*drv_name;	/* name of the crypto device driver */
157 	char			*cipher;	/* AES_CBC or AES_XTS */
158 	uint8_t			*key2;		/* key #2 for AES_XTS, per bdev */
159 	TAILQ_ENTRY(bdev_names)	link;
160 };
161 static TAILQ_HEAD(, bdev_names) g_bdev_names = TAILQ_HEAD_INITIALIZER(g_bdev_names);
162 
163 /* List of virtual bdevs and associated info for each. We keep the device friendly name here even
164  * though its also in the device struct because we use it early on.
165  */
166 struct vbdev_crypto {
167 	struct spdk_bdev		*base_bdev;		/* the thing we're attaching to */
168 	struct spdk_bdev_desc		*base_desc;		/* its descriptor we get from open */
169 	struct spdk_bdev		crypto_bdev;		/* the crypto virtual bdev */
170 	uint8_t				*key;			/* key per bdev */
171 	uint8_t				*key2;			/* for XTS */
172 	uint8_t				*xts_key;		/* key + key 2 */
173 	char				*drv_name;		/* name of the crypto device driver */
174 	char				*cipher;		/* cipher used */
175 	struct rte_cryptodev_sym_session *session_encrypt;	/* encryption session for this bdev */
176 	struct rte_cryptodev_sym_session *session_decrypt;	/* decryption session for this bdev */
177 	struct rte_crypto_sym_xform	cipher_xform;		/* crypto control struct for this bdev */
178 	TAILQ_ENTRY(vbdev_crypto)	link;
179 	struct spdk_thread		*thread;		/* thread where base device is opened */
180 };
181 static TAILQ_HEAD(, vbdev_crypto) g_vbdev_crypto = TAILQ_HEAD_INITIALIZER(g_vbdev_crypto);
182 
183 /* Shared mempools between all devices on this system */
184 static struct rte_mempool *g_session_mp = NULL;
185 static struct rte_mempool *g_session_mp_priv = NULL;
186 static struct spdk_mempool *g_mbuf_mp = NULL;		/* mbuf mempool */
187 static struct rte_mempool *g_crypto_op_mp = NULL;	/* crypto operations, must be rte* mempool */
188 
189 /* For queueing up crypto operations that we can't submit for some reason */
190 struct vbdev_crypto_op {
191 	uint8_t					cdev_id;
192 	uint8_t					qp;
193 	struct rte_crypto_op			*crypto_op;
194 	struct spdk_bdev_io			*bdev_io;
195 	TAILQ_ENTRY(vbdev_crypto_op)		link;
196 };
197 #define QUEUED_OP_LENGTH (sizeof(struct vbdev_crypto_op))
198 
199 /* The crypto vbdev channel struct. It is allocated and freed on my behalf by the io channel code.
200  * We store things in here that are needed on per thread basis like the base_channel for this thread,
201  * and the poller for this thread.
202  */
203 struct crypto_io_channel {
204 	struct spdk_io_channel		*base_ch;		/* IO channel of base device */
205 	struct spdk_poller		*poller;		/* completion poller */
206 	struct device_qp		*device_qp;		/* unique device/qp combination for this channel */
207 	TAILQ_HEAD(, spdk_bdev_io)	pending_cry_ios;	/* outstanding operations to the crypto device */
208 	struct spdk_io_channel_iter	*iter;			/* used with for_each_channel in reset */
209 	TAILQ_HEAD(, vbdev_crypto_op)	queued_cry_ops;		/* queued for re-submission to CryptoDev */
210 };
211 
212 /* This is the crypto per IO context that the bdev layer allocates for us opaquely and attaches to
213  * each IO for us.
214  */
215 struct crypto_bdev_io {
216 	int cryop_cnt_remaining;			/* counter used when completing crypto ops */
217 	struct crypto_io_channel *crypto_ch;		/* need to store for crypto completion handling */
218 	struct vbdev_crypto *crypto_bdev;		/* the crypto node struct associated with this IO */
219 	struct spdk_bdev_io *orig_io;			/* the original IO */
220 	struct spdk_bdev_io *read_io;			/* the read IO we issued */
221 	int8_t bdev_io_status;				/* the status we'll report back on the bdev IO */
222 	bool on_pending_list;
223 	/* Used for the single contiguous buffer that serves as the crypto destination target for writes */
224 	uint64_t aux_num_blocks;			/* num of blocks for the contiguous buffer */
225 	uint64_t aux_offset_blocks;			/* block offset on media */
226 	void *aux_buf_raw;				/* raw buffer that the bdev layer gave us for write buffer */
227 	struct iovec aux_buf_iov;			/* iov representing aligned contig write buffer */
228 
229 	/* for bdev_io_wait */
230 	struct spdk_bdev_io_wait_entry bdev_io_wait;
231 	struct spdk_io_channel *ch;
232 };
233 
234 /* Called by vbdev_crypto_init_crypto_drivers() to init each discovered crypto device */
235 static int
236 create_vbdev_dev(uint8_t index, uint16_t num_lcores)
237 {
238 	struct vbdev_dev *device;
239 	uint8_t j, cdev_id, cdrv_id;
240 	struct device_qp *dev_qp;
241 	struct device_qp *tmp_qp;
242 	int rc;
243 	TAILQ_HEAD(device_qps, device_qp) *dev_qp_head;
244 
245 	device = calloc(1, sizeof(struct vbdev_dev));
246 	if (!device) {
247 		return -ENOMEM;
248 	}
249 
250 	/* Get details about this device. */
251 	rte_cryptodev_info_get(index, &device->cdev_info);
252 	cdrv_id = device->cdev_info.driver_id;
253 	cdev_id = device->cdev_id = index;
254 
255 	/* Before going any further, make sure we have enough resources for this
256 	 * device type to function.  We need a unique queue pair per core accross each
257 	 * device type to remain lockless....
258 	 */
259 	if ((rte_cryptodev_device_count_by_driver(cdrv_id) *
260 	     device->cdev_info.max_nb_queue_pairs) < num_lcores) {
261 		SPDK_ERRLOG("Insufficient unique queue pairs available for %s\n",
262 			    device->cdev_info.driver_name);
263 		SPDK_ERRLOG("Either add more crypto devices or decrease core count\n");
264 		rc = -EINVAL;
265 		goto err;
266 	}
267 
268 	/* Setup queue pairs. */
269 	struct rte_cryptodev_config conf = {
270 		.nb_queue_pairs = device->cdev_info.max_nb_queue_pairs,
271 		.socket_id = SPDK_ENV_SOCKET_ID_ANY
272 	};
273 
274 	rc = rte_cryptodev_configure(cdev_id, &conf);
275 	if (rc < 0) {
276 		SPDK_ERRLOG("Failed to configure cryptodev %u\n", cdev_id);
277 		rc = -EINVAL;
278 		goto err;
279 	}
280 
281 	struct rte_cryptodev_qp_conf qp_conf = {
282 		.nb_descriptors = CRYPTO_QP_DESCRIPTORS,
283 		.mp_session = g_session_mp,
284 		.mp_session_private = g_session_mp_priv,
285 	};
286 
287 	/* Pre-setup all potential qpairs now and assign them in the channel
288 	 * callback. If we were to create them there, we'd have to stop the
289 	 * entire device affecting all other threads that might be using it
290 	 * even on other queue pairs.
291 	 */
292 	for (j = 0; j < device->cdev_info.max_nb_queue_pairs; j++) {
293 		rc = rte_cryptodev_queue_pair_setup(cdev_id, j, &qp_conf, SOCKET_ID_ANY);
294 		if (rc < 0) {
295 			SPDK_ERRLOG("Failed to setup queue pair %u on "
296 				    "cryptodev %u\n", j, cdev_id);
297 			rc = -EINVAL;
298 			goto err;
299 		}
300 	}
301 
302 	rc = rte_cryptodev_start(cdev_id);
303 	if (rc < 0) {
304 		SPDK_ERRLOG("Failed to start device %u: error %d\n",
305 			    cdev_id, rc);
306 		rc = -EINVAL;
307 		goto err;
308 	}
309 
310 	/* Select the right device/qp list based on driver name
311 	 * or error if it does not exist.
312 	 */
313 	if (strcmp(device->cdev_info.driver_name, QAT) == 0) {
314 		dev_qp_head = (struct device_qps *)&g_device_qp_qat;
315 	} else if (strcmp(device->cdev_info.driver_name, AESNI_MB) == 0) {
316 		dev_qp_head = (struct device_qps *)&g_device_qp_aesni_mb;
317 	} else {
318 		rc = -EINVAL;
319 		goto err;
320 	}
321 
322 	/* Build up lists of device/qp combinations per PMD */
323 	for (j = 0; j < device->cdev_info.max_nb_queue_pairs; j++) {
324 		dev_qp = calloc(1, sizeof(struct device_qp));
325 		if (!dev_qp) {
326 			rc = -ENOMEM;
327 			goto err_qp_alloc;
328 		}
329 		dev_qp->device = device;
330 		dev_qp->qp = j;
331 		dev_qp->in_use = false;
332 		if (strcmp(device->cdev_info.driver_name, QAT) == 0) {
333 			g_qat_total_qp++;
334 		}
335 		TAILQ_INSERT_TAIL(dev_qp_head, dev_qp, link);
336 	}
337 
338 	/* Add to our list of available crypto devices. */
339 	TAILQ_INSERT_TAIL(&g_vbdev_devs, device, link);
340 
341 	return 0;
342 err_qp_alloc:
343 	TAILQ_FOREACH_SAFE(dev_qp, dev_qp_head, link, tmp_qp) {
344 		TAILQ_REMOVE(dev_qp_head, dev_qp, link);
345 		free(dev_qp);
346 	}
347 err:
348 	free(device);
349 
350 	return rc;
351 }
352 
353 /* This is called from the module's init function. We setup all crypto devices early on as we are unable
354  * to easily dynamically configure queue pairs after the drivers are up and running.  So, here, we
355  * configure the max capabilities of each device and assign threads to queue pairs as channels are
356  * requested.
357  */
358 static int
359 vbdev_crypto_init_crypto_drivers(void)
360 {
361 	uint8_t cdev_count;
362 	uint8_t cdev_id;
363 	int i, rc = 0;
364 	struct vbdev_dev *device;
365 	struct vbdev_dev *tmp_dev;
366 	struct device_qp *dev_qp;
367 	unsigned int max_sess_size = 0, sess_size;
368 	uint16_t num_lcores = rte_lcore_count();
369 	char aesni_args[32];
370 
371 	/* Only the first call, via RPC or module init should init the crypto drivers. */
372 	if (g_session_mp != NULL) {
373 		return 0;
374 	}
375 
376 	/* We always init AESNI_MB */
377 	snprintf(aesni_args, sizeof(aesni_args), "max_nb_queue_pairs=%d", AESNI_MB_NUM_QP);
378 	rc = rte_vdev_init(AESNI_MB, aesni_args);
379 	if (rc) {
380 		SPDK_ERRLOG("error creating virtual PMD %s\n", AESNI_MB);
381 		return -EINVAL;
382 	}
383 
384 	/* If we have no crypto devices, there's no reason to continue. */
385 	cdev_count = rte_cryptodev_count();
386 	if (cdev_count == 0) {
387 		return 0;
388 	}
389 
390 	/*
391 	 * Create global mempools, shared by all devices regardless of type.
392 	 */
393 
394 	/* First determine max session size, most pools are shared by all the devices,
395 	 * so we need to find the global max sessions size.
396 	 */
397 	for (cdev_id = 0; cdev_id < cdev_count; cdev_id++) {
398 		sess_size = rte_cryptodev_sym_get_private_session_size(cdev_id);
399 		if (sess_size > max_sess_size) {
400 			max_sess_size = sess_size;
401 		}
402 	}
403 
404 	g_session_mp_priv = rte_mempool_create("session_mp_priv", NUM_SESSIONS, max_sess_size,
405 					       SESS_MEMPOOL_CACHE_SIZE, 0, NULL, NULL, NULL,
406 					       NULL, SOCKET_ID_ANY, 0);
407 	if (g_session_mp_priv == NULL) {
408 		SPDK_ERRLOG("Cannot create private session pool max size 0x%x\n", max_sess_size);
409 		return -ENOMEM;
410 	}
411 
412 	g_session_mp = rte_cryptodev_sym_session_pool_create(
413 			       "session_mp",
414 			       NUM_SESSIONS, 0, SESS_MEMPOOL_CACHE_SIZE, 0,
415 			       SOCKET_ID_ANY);
416 	if (g_session_mp == NULL) {
417 		SPDK_ERRLOG("Cannot create session pool max size 0x%x\n", max_sess_size);
418 		goto error_create_session_mp;
419 		return -ENOMEM;
420 	}
421 
422 	g_mbuf_mp = spdk_mempool_create("mbuf_mp", NUM_MBUFS, sizeof(struct rte_mbuf),
423 					SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
424 					SPDK_ENV_SOCKET_ID_ANY);
425 	if (g_mbuf_mp == NULL) {
426 		SPDK_ERRLOG("Cannot create mbuf pool\n");
427 		rc = -ENOMEM;
428 		goto error_create_mbuf;
429 	}
430 
431 	/* We use per op private data to store the IV and our own struct
432 	 * for queueing ops.
433 	 */
434 	g_crypto_op_mp = rte_crypto_op_pool_create("op_mp",
435 			 RTE_CRYPTO_OP_TYPE_SYMMETRIC,
436 			 NUM_MBUFS,
437 			 POOL_CACHE_SIZE,
438 			 AES_CBC_IV_LENGTH + QUEUED_OP_LENGTH,
439 			 rte_socket_id());
440 
441 	if (g_crypto_op_mp == NULL) {
442 		SPDK_ERRLOG("Cannot create op pool\n");
443 		rc = -ENOMEM;
444 		goto error_create_op;
445 	}
446 
447 	/* Init all devices */
448 	for (i = 0; i < cdev_count; i++) {
449 		rc = create_vbdev_dev(i, num_lcores);
450 		if (rc) {
451 			goto err;
452 		}
453 	}
454 
455 	/* Assign index values to the QAT device qp nodes so that we can
456 	 * assign them for optimal performance.
457 	 */
458 	i = 0;
459 	TAILQ_FOREACH(dev_qp, &g_device_qp_qat, link) {
460 		dev_qp->index = i++;
461 	}
462 
463 	return 0;
464 
465 	/* Error cleanup paths. */
466 err:
467 	TAILQ_FOREACH_SAFE(device, &g_vbdev_devs, link, tmp_dev) {
468 		TAILQ_REMOVE(&g_vbdev_devs, device, link);
469 		free(device);
470 	}
471 	rte_mempool_free(g_crypto_op_mp);
472 	g_crypto_op_mp = NULL;
473 error_create_op:
474 	spdk_mempool_free(g_mbuf_mp);
475 	g_mbuf_mp = NULL;
476 error_create_mbuf:
477 	rte_mempool_free(g_session_mp);
478 	g_session_mp = NULL;
479 error_create_session_mp:
480 	if (g_session_mp_priv != NULL) {
481 		rte_mempool_free(g_session_mp_priv);
482 		g_session_mp_priv = NULL;
483 	}
484 	return rc;
485 }
486 
487 /* Following an encrypt or decrypt we need to then either write the encrypted data or finish
488  * the read on decrypted data. Do that here.
489  */
490 static void
491 _crypto_operation_complete(struct spdk_bdev_io *bdev_io)
492 {
493 	struct vbdev_crypto *crypto_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_crypto,
494 					   crypto_bdev);
495 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
496 	struct crypto_io_channel *crypto_ch = io_ctx->crypto_ch;
497 	struct spdk_bdev_io *free_me = io_ctx->read_io;
498 	int rc = 0;
499 
500 	TAILQ_REMOVE(&crypto_ch->pending_cry_ios, bdev_io, module_link);
501 
502 	if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
503 
504 		/* Complete the original IO and then free the one that we created
505 		 * as a result of issuing an IO via submit_request.
506 		 */
507 		if (io_ctx->bdev_io_status != SPDK_BDEV_IO_STATUS_FAILED) {
508 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
509 		} else {
510 			SPDK_ERRLOG("Issue with decryption on bdev_io %p\n", bdev_io);
511 			rc = -EINVAL;
512 		}
513 		spdk_bdev_free_io(free_me);
514 
515 	} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
516 
517 		if (io_ctx->bdev_io_status != SPDK_BDEV_IO_STATUS_FAILED) {
518 			/* Write the encrypted data. */
519 			rc = spdk_bdev_writev_blocks(crypto_bdev->base_desc, crypto_ch->base_ch,
520 						     &io_ctx->aux_buf_iov, 1, io_ctx->aux_offset_blocks,
521 						     io_ctx->aux_num_blocks, _complete_internal_write,
522 						     bdev_io);
523 		} else {
524 			SPDK_ERRLOG("Issue with encryption on bdev_io %p\n", bdev_io);
525 			rc = -EINVAL;
526 		}
527 
528 	} else {
529 		SPDK_ERRLOG("Unknown bdev type %u on crypto operation completion\n",
530 			    bdev_io->type);
531 		rc = -EINVAL;
532 	}
533 
534 	if (rc) {
535 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
536 	}
537 }
538 
539 static int _crypto_operation(struct spdk_bdev_io *bdev_io,
540 			     enum rte_crypto_cipher_operation crypto_op,
541 			     void *aux_buf);
542 
543 /* This is the poller for the crypto device. It uses a single API to dequeue whatever is ready at
544  * the device. Then we need to decide if what we've got so far (including previous poller
545  * runs) totals up to one or more complete bdev_ios and if so continue with the bdev_io
546  * accordingly. This means either completing a read or issuing a new write.
547  */
548 static int
549 crypto_dev_poller(void *args)
550 {
551 	struct crypto_io_channel *crypto_ch = args;
552 	uint8_t cdev_id = crypto_ch->device_qp->device->cdev_id;
553 	int i, num_dequeued_ops, num_enqueued_ops;
554 	struct spdk_bdev_io *bdev_io = NULL;
555 	struct crypto_bdev_io *io_ctx = NULL;
556 	struct rte_crypto_op *dequeued_ops[MAX_DEQUEUE_BURST_SIZE];
557 	struct rte_crypto_op *mbufs_to_free[2 * MAX_DEQUEUE_BURST_SIZE];
558 	int num_mbufs = 0;
559 	struct vbdev_crypto_op *op_to_resubmit;
560 
561 	/* Each run of the poller will get just what the device has available
562 	 * at the moment we call it, we don't check again after draining the
563 	 * first batch.
564 	 */
565 	num_dequeued_ops = rte_cryptodev_dequeue_burst(cdev_id, crypto_ch->device_qp->qp,
566 			   dequeued_ops, MAX_DEQUEUE_BURST_SIZE);
567 
568 	/* Check if operation was processed successfully */
569 	for (i = 0; i < num_dequeued_ops; i++) {
570 
571 		/* We don't know the order or association of the crypto ops wrt any
572 		 * partiular bdev_io so need to look at each and determine if it's
573 		 * the last one for it's bdev_io or not.
574 		 */
575 		bdev_io = (struct spdk_bdev_io *)dequeued_ops[i]->sym->m_src->userdata;
576 		assert(bdev_io != NULL);
577 		io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
578 
579 		if (dequeued_ops[i]->status != RTE_CRYPTO_OP_STATUS_SUCCESS) {
580 			SPDK_ERRLOG("error with op %d status %u\n", i,
581 				    dequeued_ops[i]->status);
582 			/* Update the bdev status to error, we'll still process the
583 			 * rest of the crypto ops for this bdev_io though so they
584 			 * aren't left hanging.
585 			 */
586 			io_ctx->bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED;
587 		}
588 
589 		assert(io_ctx->cryop_cnt_remaining > 0);
590 
591 		/* Return the associated src and dst mbufs by collecting them into
592 		 * an array that we can use the bulk API to free after the loop.
593 		 */
594 		dequeued_ops[i]->sym->m_src->userdata = NULL;
595 		mbufs_to_free[num_mbufs++] = (void *)dequeued_ops[i]->sym->m_src;
596 		if (dequeued_ops[i]->sym->m_dst) {
597 			mbufs_to_free[num_mbufs++] = (void *)dequeued_ops[i]->sym->m_dst;
598 		}
599 
600 		/* done encrypting, complete the bdev_io */
601 		if (--io_ctx->cryop_cnt_remaining == 0) {
602 
603 			/* If we're completing this with an outstanding reset we need
604 			 * to fail it.
605 			 */
606 			if (crypto_ch->iter) {
607 				io_ctx->bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED;
608 			}
609 
610 			/* Complete the IO */
611 			_crypto_operation_complete(bdev_io);
612 		}
613 	}
614 
615 	/* Now bulk free both mbufs and crypto operations. */
616 	if (num_dequeued_ops > 0) {
617 		rte_mempool_put_bulk(g_crypto_op_mp,
618 				     (void **)dequeued_ops,
619 				     num_dequeued_ops);
620 		assert(num_mbufs > 0);
621 		spdk_mempool_put_bulk(g_mbuf_mp,
622 				      (void **)mbufs_to_free,
623 				      num_mbufs);
624 	}
625 
626 	/* Check if there are any pending crypto ops to process */
627 	while (!TAILQ_EMPTY(&crypto_ch->queued_cry_ops)) {
628 		op_to_resubmit = TAILQ_FIRST(&crypto_ch->queued_cry_ops);
629 		io_ctx = (struct crypto_bdev_io *)op_to_resubmit->bdev_io->driver_ctx;
630 		num_enqueued_ops = rte_cryptodev_enqueue_burst(op_to_resubmit->cdev_id,
631 				   op_to_resubmit->qp,
632 				   &op_to_resubmit->crypto_op,
633 				   1);
634 		if (num_enqueued_ops == 1) {
635 			/* Make sure we don't put this on twice as one bdev_io is made up
636 			 * of many crypto ops.
637 			 */
638 			if (io_ctx->on_pending_list == false) {
639 				TAILQ_INSERT_TAIL(&crypto_ch->pending_cry_ios, op_to_resubmit->bdev_io, module_link);
640 				io_ctx->on_pending_list = true;
641 			}
642 			TAILQ_REMOVE(&crypto_ch->queued_cry_ops, op_to_resubmit, link);
643 		} else {
644 			/* if we couldn't get one, just break and try again later. */
645 			break;
646 		}
647 	}
648 
649 	/* If the channel iter is not NULL, we need to continue to poll
650 	 * until the pending list is empty, then we can move on to the
651 	 * next channel.
652 	 */
653 	if (crypto_ch->iter && TAILQ_EMPTY(&crypto_ch->pending_cry_ios)) {
654 		SPDK_NOTICELOG("Channel %p has been quiesced.\n", crypto_ch);
655 		spdk_for_each_channel_continue(crypto_ch->iter, 0);
656 		crypto_ch->iter = NULL;
657 	}
658 
659 	return num_dequeued_ops;
660 }
661 
662 /* We're either encrypting on the way down or decrypting on the way back. */
663 static int
664 _crypto_operation(struct spdk_bdev_io *bdev_io, enum rte_crypto_cipher_operation crypto_op,
665 		  void *aux_buf)
666 {
667 	uint16_t num_enqueued_ops = 0;
668 	uint32_t cryop_cnt = bdev_io->u.bdev.num_blocks;
669 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
670 	struct crypto_io_channel *crypto_ch = io_ctx->crypto_ch;
671 	uint8_t cdev_id = crypto_ch->device_qp->device->cdev_id;
672 	uint32_t crypto_len = io_ctx->crypto_bdev->crypto_bdev.blocklen;
673 	uint64_t total_length = bdev_io->u.bdev.num_blocks * crypto_len;
674 	int rc;
675 	uint32_t iov_index = 0;
676 	uint32_t allocated = 0;
677 	uint8_t *current_iov = NULL;
678 	uint64_t total_remaining = 0;
679 	uint64_t updated_length, current_iov_remaining = 0;
680 	uint32_t crypto_index = 0;
681 	uint32_t en_offset = 0;
682 	struct rte_crypto_op *crypto_ops[MAX_ENQUEUE_ARRAY_SIZE];
683 	struct rte_mbuf *src_mbufs[MAX_ENQUEUE_ARRAY_SIZE];
684 	struct rte_mbuf *dst_mbufs[MAX_ENQUEUE_ARRAY_SIZE];
685 	int burst;
686 	struct vbdev_crypto_op *op_to_queue;
687 	uint64_t alignment = spdk_bdev_get_buf_align(&io_ctx->crypto_bdev->crypto_bdev);
688 
689 	assert((bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen) <= CRYPTO_MAX_IO);
690 
691 	/* Get the number of source mbufs that we need. These will always be 1:1 because we
692 	 * don't support chaining. The reason we don't is because of our decision to use
693 	 * LBA as IV, there can be no case where we'd need >1 mbuf per crypto op or the
694 	 * op would be > 1 LBA.
695 	 */
696 	rc = spdk_mempool_get_bulk(g_mbuf_mp, (void **)&src_mbufs[0], cryop_cnt);
697 	if (rc) {
698 		SPDK_ERRLOG("ERROR trying to get src_mbufs!\n");
699 		return -ENOMEM;
700 	}
701 
702 	/* Get the same amount but these buffers to describe the encrypted data location (dst). */
703 	if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
704 		rc = spdk_mempool_get_bulk(g_mbuf_mp, (void **)&dst_mbufs[0], cryop_cnt);
705 		if (rc) {
706 			SPDK_ERRLOG("ERROR trying to get dst_mbufs!\n");
707 			rc = -ENOMEM;
708 			goto error_get_dst;
709 		}
710 	}
711 
712 #ifdef __clang_analyzer__
713 	/* silence scan-build false positive */
714 	SPDK_CLANG_ANALYZER_PREINIT_PTR_ARRAY(crypto_ops, MAX_ENQUEUE_ARRAY_SIZE, 0x1000);
715 #endif
716 	/* Allocate crypto operations. */
717 	allocated = rte_crypto_op_bulk_alloc(g_crypto_op_mp,
718 					     RTE_CRYPTO_OP_TYPE_SYMMETRIC,
719 					     crypto_ops, cryop_cnt);
720 	if (allocated < cryop_cnt) {
721 		SPDK_ERRLOG("ERROR trying to get crypto ops!\n");
722 		rc = -ENOMEM;
723 		goto error_get_ops;
724 	}
725 
726 	/* For encryption, we need to prepare a single contiguous buffer as the encryption
727 	 * destination, we'll then pass that along for the write after encryption is done.
728 	 * This is done to avoiding encrypting the provided write buffer which may be
729 	 * undesirable in some use cases.
730 	 */
731 	if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
732 		io_ctx->aux_buf_iov.iov_len = total_length;
733 		io_ctx->aux_buf_raw = aux_buf;
734 		io_ctx->aux_buf_iov.iov_base  = (void *)(((uintptr_t)aux_buf + (alignment - 1)) & ~(alignment - 1));
735 		io_ctx->aux_offset_blocks = bdev_io->u.bdev.offset_blocks;
736 		io_ctx->aux_num_blocks = bdev_io->u.bdev.num_blocks;
737 	}
738 
739 	/* This value is used in the completion callback to determine when the bdev_io is
740 	 * complete.
741 	 */
742 	io_ctx->cryop_cnt_remaining = cryop_cnt;
743 
744 	/* As we don't support chaining because of a decision to use LBA as IV, construction
745 	 * of crypto operations is straightforward. We build both the op, the mbuf and the
746 	 * dst_mbuf in our local arrays by looping through the length of the bdev IO and
747 	 * picking off LBA sized blocks of memory from the IOVs as we walk through them. Each
748 	 * LBA sized chunk of memory will correspond 1:1 to a crypto operation and a single
749 	 * mbuf per crypto operation.
750 	 */
751 	total_remaining = total_length;
752 	current_iov = bdev_io->u.bdev.iovs[iov_index].iov_base;
753 	current_iov_remaining = bdev_io->u.bdev.iovs[iov_index].iov_len;
754 	do {
755 		uint8_t *iv_ptr;
756 		uint64_t op_block_offset;
757 
758 		/* Set the mbuf elements address and length. Null out the next pointer. */
759 		src_mbufs[crypto_index]->buf_addr = current_iov;
760 		src_mbufs[crypto_index]->data_len = updated_length = crypto_len;
761 		/* TODO: Make this assignment conditional on QAT usage and add an assert. */
762 		src_mbufs[crypto_index]->buf_iova = spdk_vtophys((void *)current_iov, &updated_length);
763 		src_mbufs[crypto_index]->next = NULL;
764 		/* Store context in every mbuf as we don't know anything about completion order */
765 		src_mbufs[crypto_index]->userdata = bdev_io;
766 
767 		/* Set the IV - we use the LBA of the crypto_op */
768 		iv_ptr = rte_crypto_op_ctod_offset(crypto_ops[crypto_index], uint8_t *,
769 						   IV_OFFSET);
770 		memset(iv_ptr, 0, AES_CBC_IV_LENGTH);
771 		op_block_offset = bdev_io->u.bdev.offset_blocks + crypto_index;
772 		rte_memcpy(iv_ptr, &op_block_offset, sizeof(uint64_t));
773 
774 		/* Set the data to encrypt/decrypt length */
775 		crypto_ops[crypto_index]->sym->cipher.data.length = crypto_len;
776 		crypto_ops[crypto_index]->sym->cipher.data.offset = 0;
777 
778 		/* link the mbuf to the crypto op. */
779 		crypto_ops[crypto_index]->sym->m_src = src_mbufs[crypto_index];
780 		if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
781 			crypto_ops[crypto_index]->sym->m_dst = src_mbufs[crypto_index];
782 		} else {
783 			crypto_ops[crypto_index]->sym->m_dst = NULL;
784 		}
785 
786 		/* For encrypt, point the destination to a buffer we allocate and redirect the bdev_io
787 		 * that will be used to process the write on completion to the same buffer. Setting
788 		 * up the en_buffer is a little simpler as we know the destination buffer is single IOV.
789 		 */
790 		if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
791 
792 			/* Set the relevant destination en_mbuf elements. */
793 			dst_mbufs[crypto_index]->buf_addr = io_ctx->aux_buf_iov.iov_base + en_offset;
794 			dst_mbufs[crypto_index]->data_len = updated_length = crypto_len;
795 			/* TODO: Make this assignment conditional on QAT usage and add an assert. */
796 			dst_mbufs[crypto_index]->buf_iova = spdk_vtophys(dst_mbufs[crypto_index]->buf_addr,
797 							    &updated_length);
798 			crypto_ops[crypto_index]->sym->m_dst = dst_mbufs[crypto_index];
799 			en_offset += crypto_len;
800 			dst_mbufs[crypto_index]->next = NULL;
801 
802 			/* Attach the crypto session to the operation */
803 			rc = rte_crypto_op_attach_sym_session(crypto_ops[crypto_index],
804 							      io_ctx->crypto_bdev->session_encrypt);
805 			if (rc) {
806 				rc = -EINVAL;
807 				goto error_attach_session;
808 			}
809 
810 		} else {
811 			/* Attach the crypto session to the operation */
812 			rc = rte_crypto_op_attach_sym_session(crypto_ops[crypto_index],
813 							      io_ctx->crypto_bdev->session_decrypt);
814 			if (rc) {
815 				rc = -EINVAL;
816 				goto error_attach_session;
817 			}
818 
819 
820 		}
821 
822 		/* Subtract our running totals for the op in progress and the overall bdev io */
823 		total_remaining -= crypto_len;
824 		current_iov_remaining -= crypto_len;
825 
826 		/* move our current IOV pointer accordingly. */
827 		current_iov += crypto_len;
828 
829 		/* move on to the next crypto operation */
830 		crypto_index++;
831 
832 		/* If we're done with this IOV, move to the next one. */
833 		if (current_iov_remaining == 0 && total_remaining > 0) {
834 			iov_index++;
835 			current_iov = bdev_io->u.bdev.iovs[iov_index].iov_base;
836 			current_iov_remaining = bdev_io->u.bdev.iovs[iov_index].iov_len;
837 		}
838 	} while (total_remaining > 0);
839 
840 	/* Enqueue everything we've got but limit by the max number of descriptors we
841 	 * configured the crypto device for.
842 	 */
843 	burst = spdk_min(cryop_cnt, CRYPTO_QP_DESCRIPTORS);
844 	num_enqueued_ops = rte_cryptodev_enqueue_burst(cdev_id, crypto_ch->device_qp->qp,
845 			   &crypto_ops[0],
846 			   burst);
847 
848 	/* Add this bdev_io to our outstanding list if any of its crypto ops made it. */
849 	if (num_enqueued_ops > 0) {
850 		TAILQ_INSERT_TAIL(&crypto_ch->pending_cry_ios, bdev_io, module_link);
851 		io_ctx->on_pending_list = true;
852 	}
853 	/* We were unable to enqueue everything but did get some, so need to decide what
854 	 * to do based on the status of the last op.
855 	 */
856 	if (num_enqueued_ops < cryop_cnt) {
857 		switch (crypto_ops[num_enqueued_ops]->status) {
858 		case RTE_CRYPTO_OP_STATUS_NOT_PROCESSED:
859 			/* Queue them up on a linked list to be resubmitted via the poller. */
860 			for (crypto_index = num_enqueued_ops; crypto_index < cryop_cnt; crypto_index++) {
861 				op_to_queue = (struct vbdev_crypto_op *)rte_crypto_op_ctod_offset(crypto_ops[crypto_index],
862 						uint8_t *, QUEUED_OP_OFFSET);
863 				op_to_queue->cdev_id = cdev_id;
864 				op_to_queue->qp = crypto_ch->device_qp->qp;
865 				op_to_queue->crypto_op = crypto_ops[crypto_index];
866 				op_to_queue->bdev_io = bdev_io;
867 				TAILQ_INSERT_TAIL(&crypto_ch->queued_cry_ops,
868 						  op_to_queue,
869 						  link);
870 			}
871 			break;
872 		default:
873 			/* For all other statuses, set the io_ctx bdev_io status so that
874 			 * the poller will pick the failure up for the overall bdev status.
875 			 */
876 			io_ctx->bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED;
877 			if (num_enqueued_ops == 0) {
878 				/* If nothing was enqueued, but the last one wasn't because of
879 				 * busy, fail it now as the poller won't know anything about it.
880 				 */
881 				_crypto_operation_complete(bdev_io);
882 				rc = -EINVAL;
883 				goto error_attach_session;
884 			}
885 			break;
886 		}
887 	}
888 
889 	return rc;
890 
891 	/* Error cleanup paths. */
892 error_attach_session:
893 error_get_ops:
894 	if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
895 		spdk_mempool_put_bulk(g_mbuf_mp, (void **)&dst_mbufs[0],
896 				      cryop_cnt);
897 	}
898 	if (allocated > 0) {
899 		rte_mempool_put_bulk(g_crypto_op_mp, (void **)crypto_ops,
900 				     allocated);
901 	}
902 error_get_dst:
903 	spdk_mempool_put_bulk(g_mbuf_mp, (void **)&src_mbufs[0],
904 			      cryop_cnt);
905 	return rc;
906 }
907 
908 /* This function is called after all channels have been quiesced following
909  * a bdev reset.
910  */
911 static void
912 _ch_quiesce_done(struct spdk_io_channel_iter *i, int status)
913 {
914 	struct crypto_bdev_io *io_ctx = spdk_io_channel_iter_get_ctx(i);
915 
916 	assert(TAILQ_EMPTY(&io_ctx->crypto_ch->pending_cry_ios));
917 	assert(io_ctx->orig_io != NULL);
918 
919 	spdk_bdev_io_complete(io_ctx->orig_io, SPDK_BDEV_IO_STATUS_SUCCESS);
920 }
921 
922 /* This function is called per channel to quiesce IOs before completing a
923  * bdev reset that we received.
924  */
925 static void
926 _ch_quiesce(struct spdk_io_channel_iter *i)
927 {
928 	struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
929 	struct crypto_io_channel *crypto_ch = spdk_io_channel_get_ctx(ch);
930 
931 	crypto_ch->iter = i;
932 	/* When the poller runs, it will see the non-NULL iter and handle
933 	 * the quiesce.
934 	 */
935 }
936 
937 /* Completion callback for IO that were issued from this bdev other than read/write.
938  * They have their own for readability.
939  */
940 static void
941 _complete_internal_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
942 {
943 	struct spdk_bdev_io *orig_io = cb_arg;
944 	int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
945 
946 	if (bdev_io->type == SPDK_BDEV_IO_TYPE_RESET) {
947 		struct crypto_bdev_io *orig_ctx = (struct crypto_bdev_io *)orig_io->driver_ctx;
948 
949 		assert(orig_io == orig_ctx->orig_io);
950 
951 		spdk_bdev_free_io(bdev_io);
952 
953 		spdk_for_each_channel(orig_ctx->crypto_bdev,
954 				      _ch_quiesce,
955 				      orig_ctx,
956 				      _ch_quiesce_done);
957 		return;
958 	}
959 
960 	spdk_bdev_io_complete(orig_io, status);
961 	spdk_bdev_free_io(bdev_io);
962 }
963 
964 /* Completion callback for writes that were issued from this bdev. */
965 static void
966 _complete_internal_write(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
967 {
968 	struct spdk_bdev_io *orig_io = cb_arg;
969 	int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
970 	struct crypto_bdev_io *orig_ctx = (struct crypto_bdev_io *)orig_io->driver_ctx;
971 
972 	spdk_bdev_io_put_aux_buf(orig_io, orig_ctx->aux_buf_raw);
973 
974 	spdk_bdev_io_complete(orig_io, status);
975 	spdk_bdev_free_io(bdev_io);
976 }
977 
978 /* Completion callback for reads that were issued from this bdev. */
979 static void
980 _complete_internal_read(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
981 {
982 	struct spdk_bdev_io *orig_io = cb_arg;
983 	struct crypto_bdev_io *orig_ctx = (struct crypto_bdev_io *)orig_io->driver_ctx;
984 
985 	if (success) {
986 
987 		/* Save off this bdev_io so it can be freed after decryption. */
988 		orig_ctx->read_io = bdev_io;
989 
990 		if (!_crypto_operation(orig_io, RTE_CRYPTO_CIPHER_OP_DECRYPT, NULL)) {
991 			return;
992 		} else {
993 			SPDK_ERRLOG("ERROR decrypting\n");
994 		}
995 	} else {
996 		SPDK_ERRLOG("ERROR on read prior to decrypting\n");
997 	}
998 
999 	spdk_bdev_io_complete(orig_io, SPDK_BDEV_IO_STATUS_FAILED);
1000 	spdk_bdev_free_io(bdev_io);
1001 }
1002 
1003 static void
1004 vbdev_crypto_resubmit_io(void *arg)
1005 {
1006 	struct spdk_bdev_io *bdev_io = (struct spdk_bdev_io *)arg;
1007 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
1008 
1009 	vbdev_crypto_submit_request(io_ctx->ch, bdev_io);
1010 }
1011 
1012 static void
1013 vbdev_crypto_queue_io(struct spdk_bdev_io *bdev_io)
1014 {
1015 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
1016 	int rc;
1017 
1018 	io_ctx->bdev_io_wait.bdev = bdev_io->bdev;
1019 	io_ctx->bdev_io_wait.cb_fn = vbdev_crypto_resubmit_io;
1020 	io_ctx->bdev_io_wait.cb_arg = bdev_io;
1021 
1022 	rc = spdk_bdev_queue_io_wait(bdev_io->bdev, io_ctx->crypto_ch->base_ch, &io_ctx->bdev_io_wait);
1023 	if (rc != 0) {
1024 		SPDK_ERRLOG("Queue io failed in vbdev_crypto_queue_io, rc=%d.\n", rc);
1025 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1026 	}
1027 }
1028 
1029 /* Callback for getting a buf from the bdev pool in the event that the caller passed
1030  * in NULL, we need to own the buffer so it doesn't get freed by another vbdev module
1031  * beneath us before we're done with it.
1032  */
1033 static void
1034 crypto_read_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
1035 		       bool success)
1036 {
1037 	struct vbdev_crypto *crypto_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_crypto,
1038 					   crypto_bdev);
1039 	struct crypto_io_channel *crypto_ch = spdk_io_channel_get_ctx(ch);
1040 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
1041 	int rc;
1042 
1043 	if (!success) {
1044 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1045 		return;
1046 	}
1047 
1048 	rc = spdk_bdev_readv_blocks(crypto_bdev->base_desc, crypto_ch->base_ch, bdev_io->u.bdev.iovs,
1049 				    bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.offset_blocks,
1050 				    bdev_io->u.bdev.num_blocks, _complete_internal_read,
1051 				    bdev_io);
1052 	if (rc != 0) {
1053 		if (rc == -ENOMEM) {
1054 			SPDK_DEBUGLOG(vbdev_crypto, "No memory, queue the IO.\n");
1055 			io_ctx->ch = ch;
1056 			vbdev_crypto_queue_io(bdev_io);
1057 		} else {
1058 			SPDK_ERRLOG("ERROR on bdev_io submission!\n");
1059 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1060 		}
1061 	}
1062 }
1063 
1064 /* For encryption we don't want to encrypt the data in place as the host isn't
1065  * expecting us to mangle its data buffers so we need to encrypt into the bdev
1066  * aux buffer, then we can use that as the source for the disk data transfer.
1067  */
1068 static void
1069 crypto_write_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
1070 			void *aux_buf)
1071 {
1072 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
1073 	int rc = 0;
1074 
1075 	rc = _crypto_operation(bdev_io, RTE_CRYPTO_CIPHER_OP_ENCRYPT, aux_buf);
1076 	if (rc != 0) {
1077 		spdk_bdev_io_put_aux_buf(bdev_io, aux_buf);
1078 		if (rc == -ENOMEM) {
1079 			SPDK_DEBUGLOG(vbdev_crypto, "No memory, queue the IO.\n");
1080 			io_ctx->ch = ch;
1081 			vbdev_crypto_queue_io(bdev_io);
1082 		} else {
1083 			SPDK_ERRLOG("ERROR on bdev_io submission!\n");
1084 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1085 		}
1086 	}
1087 }
1088 
1089 /* Called when someone submits IO to this crypto vbdev. For IO's not relevant to crypto,
1090  * we're simply passing it on here via SPDK IO calls which in turn allocate another bdev IO
1091  * and call our cpl callback provided below along with the original bdev_io so that we can
1092  * complete it once this IO completes. For crypto operations, we'll either encrypt it first
1093  * (writes) then call back into bdev to submit it or we'll submit a read and then catch it
1094  * on the way back for decryption.
1095  */
1096 static void
1097 vbdev_crypto_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
1098 {
1099 	struct vbdev_crypto *crypto_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_crypto,
1100 					   crypto_bdev);
1101 	struct crypto_io_channel *crypto_ch = spdk_io_channel_get_ctx(ch);
1102 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
1103 	int rc = 0;
1104 
1105 	memset(io_ctx, 0, sizeof(struct crypto_bdev_io));
1106 	io_ctx->crypto_bdev = crypto_bdev;
1107 	io_ctx->crypto_ch = crypto_ch;
1108 	io_ctx->orig_io = bdev_io;
1109 	io_ctx->bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
1110 
1111 	switch (bdev_io->type) {
1112 	case SPDK_BDEV_IO_TYPE_READ:
1113 		spdk_bdev_io_get_buf(bdev_io, crypto_read_get_buf_cb,
1114 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
1115 		break;
1116 	case SPDK_BDEV_IO_TYPE_WRITE:
1117 		/* Tell the bdev layer that we need an aux buf in addition to the data
1118 		 * buf already associated with the bdev.
1119 		 */
1120 		spdk_bdev_io_get_aux_buf(bdev_io, crypto_write_get_buf_cb);
1121 		break;
1122 	case SPDK_BDEV_IO_TYPE_UNMAP:
1123 		rc = spdk_bdev_unmap_blocks(crypto_bdev->base_desc, crypto_ch->base_ch,
1124 					    bdev_io->u.bdev.offset_blocks,
1125 					    bdev_io->u.bdev.num_blocks,
1126 					    _complete_internal_io, bdev_io);
1127 		break;
1128 	case SPDK_BDEV_IO_TYPE_FLUSH:
1129 		rc = spdk_bdev_flush_blocks(crypto_bdev->base_desc, crypto_ch->base_ch,
1130 					    bdev_io->u.bdev.offset_blocks,
1131 					    bdev_io->u.bdev.num_blocks,
1132 					    _complete_internal_io, bdev_io);
1133 		break;
1134 	case SPDK_BDEV_IO_TYPE_RESET:
1135 		rc = spdk_bdev_reset(crypto_bdev->base_desc, crypto_ch->base_ch,
1136 				     _complete_internal_io, bdev_io);
1137 		break;
1138 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
1139 	default:
1140 		SPDK_ERRLOG("crypto: unknown I/O type %d\n", bdev_io->type);
1141 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1142 		return;
1143 	}
1144 
1145 	if (rc != 0) {
1146 		if (rc == -ENOMEM) {
1147 			SPDK_DEBUGLOG(vbdev_crypto, "No memory, queue the IO.\n");
1148 			io_ctx->ch = ch;
1149 			vbdev_crypto_queue_io(bdev_io);
1150 		} else {
1151 			SPDK_ERRLOG("ERROR on bdev_io submission!\n");
1152 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1153 		}
1154 	}
1155 }
1156 
1157 /* We'll just call the base bdev and let it answer except for WZ command which
1158  * we always say we don't support so that the bdev layer will actually send us
1159  * real writes that we can encrypt.
1160  */
1161 static bool
1162 vbdev_crypto_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
1163 {
1164 	struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx;
1165 
1166 	switch (io_type) {
1167 	case SPDK_BDEV_IO_TYPE_WRITE:
1168 	case SPDK_BDEV_IO_TYPE_UNMAP:
1169 	case SPDK_BDEV_IO_TYPE_RESET:
1170 	case SPDK_BDEV_IO_TYPE_READ:
1171 	case SPDK_BDEV_IO_TYPE_FLUSH:
1172 		return spdk_bdev_io_type_supported(crypto_bdev->base_bdev, io_type);
1173 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
1174 	/* Force the bdev layer to issue actual writes of zeroes so we can
1175 	 * encrypt them as regular writes.
1176 	 */
1177 	default:
1178 		return false;
1179 	}
1180 }
1181 
1182 /* Callback for unregistering the IO device. */
1183 static void
1184 _device_unregister_cb(void *io_device)
1185 {
1186 	struct vbdev_crypto *crypto_bdev = io_device;
1187 
1188 	/* Done with this crypto_bdev. */
1189 	rte_cryptodev_sym_session_free(crypto_bdev->session_decrypt);
1190 	rte_cryptodev_sym_session_free(crypto_bdev->session_encrypt);
1191 	free(crypto_bdev->drv_name);
1192 	if (crypto_bdev->key) {
1193 		memset(crypto_bdev->key, 0, strnlen(crypto_bdev->key, (AES_CBC_KEY_LENGTH + 1)));
1194 		free(crypto_bdev->key);
1195 	}
1196 	if (crypto_bdev->key2) {
1197 		memset(crypto_bdev->key2, 0, strnlen(crypto_bdev->key2, (AES_XTS_KEY_LENGTH + 1)));
1198 		free(crypto_bdev->key2);
1199 	}
1200 	if (crypto_bdev->xts_key) {
1201 		memset(crypto_bdev->xts_key, 0, strnlen(crypto_bdev->xts_key, (AES_XTS_KEY_LENGTH * 2) + 1));
1202 		free(crypto_bdev->xts_key);
1203 	}
1204 	free(crypto_bdev->crypto_bdev.name);
1205 	free(crypto_bdev);
1206 }
1207 
1208 /* Wrapper for the bdev close operation. */
1209 static void
1210 _vbdev_crypto_destruct(void *ctx)
1211 {
1212 	struct spdk_bdev_desc *desc = ctx;
1213 
1214 	spdk_bdev_close(desc);
1215 }
1216 
1217 /* Called after we've unregistered following a hot remove callback.
1218  * Our finish entry point will be called next.
1219  */
1220 static int
1221 vbdev_crypto_destruct(void *ctx)
1222 {
1223 	struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx;
1224 
1225 	/* Remove this device from the internal list */
1226 	TAILQ_REMOVE(&g_vbdev_crypto, crypto_bdev, link);
1227 
1228 	/* Unclaim the underlying bdev. */
1229 	spdk_bdev_module_release_bdev(crypto_bdev->base_bdev);
1230 
1231 	/* Close the underlying bdev on its same opened thread. */
1232 	if (crypto_bdev->thread && crypto_bdev->thread != spdk_get_thread()) {
1233 		spdk_thread_send_msg(crypto_bdev->thread, _vbdev_crypto_destruct, crypto_bdev->base_desc);
1234 	} else {
1235 		spdk_bdev_close(crypto_bdev->base_desc);
1236 	}
1237 
1238 	/* Unregister the io_device. */
1239 	spdk_io_device_unregister(crypto_bdev, _device_unregister_cb);
1240 
1241 	g_number_of_claimed_volumes--;
1242 
1243 	return 0;
1244 }
1245 
1246 /* We supplied this as an entry point for upper layers who want to communicate to this
1247  * bdev.  This is how they get a channel. We are passed the same context we provided when
1248  * we created our crypto vbdev in examine() which, for this bdev, is the address of one of
1249  * our context nodes. From here we'll ask the SPDK channel code to fill out our channel
1250  * struct and we'll keep it in our crypto node.
1251  */
1252 static struct spdk_io_channel *
1253 vbdev_crypto_get_io_channel(void *ctx)
1254 {
1255 	struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx;
1256 
1257 	/* The IO channel code will allocate a channel for us which consists of
1258 	 * the SPDK channel structure plus the size of our crypto_io_channel struct
1259 	 * that we passed in when we registered our IO device. It will then call
1260 	 * our channel create callback to populate any elements that we need to
1261 	 * update.
1262 	 */
1263 	return spdk_get_io_channel(crypto_bdev);
1264 }
1265 
1266 /* This is the output for bdev_get_bdevs() for this vbdev */
1267 static int
1268 vbdev_crypto_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
1269 {
1270 	struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx;
1271 
1272 	spdk_json_write_name(w, "crypto");
1273 	spdk_json_write_object_begin(w);
1274 	spdk_json_write_named_string(w, "base_bdev_name", spdk_bdev_get_name(crypto_bdev->base_bdev));
1275 	spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&crypto_bdev->crypto_bdev));
1276 	spdk_json_write_named_string(w, "crypto_pmd", crypto_bdev->drv_name);
1277 	spdk_json_write_named_string(w, "key", crypto_bdev->key);
1278 	if (strcmp(crypto_bdev->cipher, AES_XTS) == 0) {
1279 		spdk_json_write_named_string(w, "key2", crypto_bdev->key);
1280 	}
1281 	spdk_json_write_named_string(w, "cipher", crypto_bdev->cipher);
1282 	spdk_json_write_object_end(w);
1283 	return 0;
1284 }
1285 
1286 static int
1287 vbdev_crypto_config_json(struct spdk_json_write_ctx *w)
1288 {
1289 	struct vbdev_crypto *crypto_bdev;
1290 
1291 	TAILQ_FOREACH(crypto_bdev, &g_vbdev_crypto, link) {
1292 		spdk_json_write_object_begin(w);
1293 		spdk_json_write_named_string(w, "method", "bdev_crypto_create");
1294 		spdk_json_write_named_object_begin(w, "params");
1295 		spdk_json_write_named_string(w, "base_bdev_name", spdk_bdev_get_name(crypto_bdev->base_bdev));
1296 		spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&crypto_bdev->crypto_bdev));
1297 		spdk_json_write_named_string(w, "crypto_pmd", crypto_bdev->drv_name);
1298 		spdk_json_write_named_string(w, "key", crypto_bdev->key);
1299 		if (strcmp(crypto_bdev->cipher, AES_XTS) == 0) {
1300 			spdk_json_write_named_string(w, "key2", crypto_bdev->key);
1301 		}
1302 		spdk_json_write_named_string(w, "cipher", crypto_bdev->cipher);
1303 		spdk_json_write_object_end(w);
1304 		spdk_json_write_object_end(w);
1305 	}
1306 	return 0;
1307 }
1308 
1309 /* Helper function for the channel creation callback. */
1310 static void
1311 _assign_device_qp(struct vbdev_crypto *crypto_bdev, struct device_qp *device_qp,
1312 		  struct crypto_io_channel *crypto_ch)
1313 {
1314 	pthread_mutex_lock(&g_device_qp_lock);
1315 	if (strcmp(crypto_bdev->drv_name, QAT) == 0) {
1316 		/* For some QAT devices, the optimal qp to use is every 32nd as this spreads the
1317 		 * workload out over the multiple virtual functions in the device. For the devices
1318 		 * where this isn't the case, it doesn't hurt.
1319 		 */
1320 		TAILQ_FOREACH(device_qp, &g_device_qp_qat, link) {
1321 			if (device_qp->index != g_next_qat_index) {
1322 				continue;
1323 			}
1324 			if (device_qp->in_use == false) {
1325 				crypto_ch->device_qp = device_qp;
1326 				device_qp->in_use = true;
1327 				g_next_qat_index = (g_next_qat_index + QAT_VF_SPREAD) % g_qat_total_qp;
1328 				break;
1329 			} else {
1330 				/* if the preferred index is used, skip to the next one in this set. */
1331 				g_next_qat_index = (g_next_qat_index + 1) % g_qat_total_qp;
1332 			}
1333 		}
1334 	} else if (strcmp(crypto_bdev->drv_name, AESNI_MB) == 0) {
1335 		TAILQ_FOREACH(device_qp, &g_device_qp_aesni_mb, link) {
1336 			if (device_qp->in_use == false) {
1337 				crypto_ch->device_qp = device_qp;
1338 				device_qp->in_use = true;
1339 				break;
1340 			}
1341 		}
1342 	}
1343 	pthread_mutex_unlock(&g_device_qp_lock);
1344 }
1345 
1346 /* We provide this callback for the SPDK channel code to create a channel using
1347  * the channel struct we provided in our module get_io_channel() entry point. Here
1348  * we get and save off an underlying base channel of the device below us so that
1349  * we can communicate with the base bdev on a per channel basis. We also register the
1350  * poller used to complete crypto operations from the device.
1351  */
1352 static int
1353 crypto_bdev_ch_create_cb(void *io_device, void *ctx_buf)
1354 {
1355 	struct crypto_io_channel *crypto_ch = ctx_buf;
1356 	struct vbdev_crypto *crypto_bdev = io_device;
1357 	struct device_qp *device_qp = NULL;
1358 
1359 	crypto_ch->base_ch = spdk_bdev_get_io_channel(crypto_bdev->base_desc);
1360 	crypto_ch->poller = SPDK_POLLER_REGISTER(crypto_dev_poller, crypto_ch, 0);
1361 	crypto_ch->device_qp = NULL;
1362 
1363 	/* Assign a device/qp combination that is unique per channel per PMD. */
1364 	_assign_device_qp(crypto_bdev, device_qp, crypto_ch);
1365 	assert(crypto_ch->device_qp);
1366 
1367 	/* We use this queue to track outstanding IO in our layer. */
1368 	TAILQ_INIT(&crypto_ch->pending_cry_ios);
1369 
1370 	/* We use this to queue up crypto ops when the device is busy. */
1371 	TAILQ_INIT(&crypto_ch->queued_cry_ops);
1372 
1373 	return 0;
1374 }
1375 
1376 /* We provide this callback for the SPDK channel code to destroy a channel
1377  * created with our create callback. We just need to undo anything we did
1378  * when we created.
1379  */
1380 static void
1381 crypto_bdev_ch_destroy_cb(void *io_device, void *ctx_buf)
1382 {
1383 	struct crypto_io_channel *crypto_ch = ctx_buf;
1384 
1385 	pthread_mutex_lock(&g_device_qp_lock);
1386 	crypto_ch->device_qp->in_use = false;
1387 	pthread_mutex_unlock(&g_device_qp_lock);
1388 
1389 	spdk_poller_unregister(&crypto_ch->poller);
1390 	spdk_put_io_channel(crypto_ch->base_ch);
1391 }
1392 
1393 /* Create the association from the bdev and vbdev name and insert
1394  * on the global list. */
1395 static int
1396 vbdev_crypto_insert_name(const char *bdev_name, const char *vbdev_name,
1397 			 const char *crypto_pmd, const char *key,
1398 			 const char *cipher, const char *key2)
1399 {
1400 	struct bdev_names *name;
1401 	int rc, j;
1402 	bool found = false;
1403 
1404 	TAILQ_FOREACH(name, &g_bdev_names, link) {
1405 		if (strcmp(vbdev_name, name->vbdev_name) == 0) {
1406 			SPDK_ERRLOG("crypto bdev %s already exists\n", vbdev_name);
1407 			return -EEXIST;
1408 		}
1409 	}
1410 
1411 	name = calloc(1, sizeof(struct bdev_names));
1412 	if (!name) {
1413 		SPDK_ERRLOG("could not allocate bdev_names\n");
1414 		return -ENOMEM;
1415 	}
1416 
1417 	name->bdev_name = strdup(bdev_name);
1418 	if (!name->bdev_name) {
1419 		SPDK_ERRLOG("could not allocate name->bdev_name\n");
1420 		rc = -ENOMEM;
1421 		goto error_alloc_bname;
1422 	}
1423 
1424 	name->vbdev_name = strdup(vbdev_name);
1425 	if (!name->vbdev_name) {
1426 		SPDK_ERRLOG("could not allocate name->vbdev_name\n");
1427 		rc = -ENOMEM;
1428 		goto error_alloc_vname;
1429 	}
1430 
1431 	name->drv_name = strdup(crypto_pmd);
1432 	if (!name->drv_name) {
1433 		SPDK_ERRLOG("could not allocate name->drv_name\n");
1434 		rc = -ENOMEM;
1435 		goto error_alloc_dname;
1436 	}
1437 	for (j = 0; j < MAX_NUM_DRV_TYPES ; j++) {
1438 		if (strcmp(crypto_pmd, g_driver_names[j]) == 0) {
1439 			found = true;
1440 			break;
1441 		}
1442 	}
1443 	if (!found) {
1444 		SPDK_ERRLOG("invalid crypto PMD type %s\n", crypto_pmd);
1445 		rc = -EINVAL;
1446 		goto error_invalid_pmd;
1447 	}
1448 
1449 	name->key = strdup(key);
1450 	if (!name->key) {
1451 		SPDK_ERRLOG("could not allocate name->key\n");
1452 		rc = -ENOMEM;
1453 		goto error_alloc_key;
1454 	}
1455 	if (strnlen(name->key, (AES_CBC_KEY_LENGTH + 1)) != AES_CBC_KEY_LENGTH) {
1456 		SPDK_ERRLOG("invalid AES_CBC key length\n");
1457 		rc = -EINVAL;
1458 		goto error_invalid_key;
1459 	}
1460 
1461 	if (strncmp(cipher, AES_XTS, sizeof(AES_XTS)) == 0) {
1462 		/* To please scan-build, input validation makes sure we can't
1463 		 * have this cipher without providing a key2.
1464 		 */
1465 		name->cipher = AES_XTS;
1466 		assert(key2);
1467 		if (strnlen(key2, (AES_XTS_KEY_LENGTH + 1)) != AES_XTS_KEY_LENGTH) {
1468 			SPDK_ERRLOG("invalid AES_XTS key length\n");
1469 			rc = -EINVAL;
1470 			goto error_invalid_key2;
1471 		}
1472 
1473 		name->key2 = strdup(key2);
1474 		if (!name->key2) {
1475 			SPDK_ERRLOG("could not allocate name->key2\n");
1476 			rc = -ENOMEM;
1477 			goto error_alloc_key2;
1478 		}
1479 	} else if (strncmp(cipher, AES_CBC, sizeof(AES_CBC)) == 0) {
1480 		name->cipher = AES_CBC;
1481 	} else {
1482 		SPDK_ERRLOG("Invalid cipher: %s\n", cipher);
1483 		rc = -EINVAL;
1484 		goto error_cipher;
1485 	}
1486 
1487 	TAILQ_INSERT_TAIL(&g_bdev_names, name, link);
1488 
1489 	return 0;
1490 
1491 	/* Error cleanup paths. */
1492 error_cipher:
1493 	free(name->key2);
1494 error_alloc_key2:
1495 error_invalid_key2:
1496 error_invalid_key:
1497 	free(name->key);
1498 error_alloc_key:
1499 error_invalid_pmd:
1500 	free(name->drv_name);
1501 error_alloc_dname:
1502 	free(name->vbdev_name);
1503 error_alloc_vname:
1504 	free(name->bdev_name);
1505 error_alloc_bname:
1506 	free(name);
1507 	return rc;
1508 }
1509 
1510 /* RPC entry point for crypto creation. */
1511 int
1512 create_crypto_disk(const char *bdev_name, const char *vbdev_name,
1513 		   const char *crypto_pmd, const char *key,
1514 		   const char *cipher, const char *key2)
1515 {
1516 	int rc;
1517 
1518 	rc = vbdev_crypto_insert_name(bdev_name, vbdev_name, crypto_pmd, key, cipher, key2);
1519 	if (rc) {
1520 		return rc;
1521 	}
1522 
1523 	rc = vbdev_crypto_claim(bdev_name);
1524 	if (rc == -ENODEV) {
1525 		SPDK_NOTICELOG("vbdev creation deferred pending base bdev arrival\n");
1526 		rc = 0;
1527 	}
1528 
1529 	return rc;
1530 }
1531 
1532 /* Called at driver init time, parses config file to prepare for examine calls,
1533  * also fully initializes the crypto drivers.
1534  */
1535 static int
1536 vbdev_crypto_init(void)
1537 {
1538 	int rc = 0;
1539 
1540 	/* Fully configure both SW and HW drivers. */
1541 	rc = vbdev_crypto_init_crypto_drivers();
1542 	if (rc) {
1543 		SPDK_ERRLOG("Error setting up crypto devices\n");
1544 	}
1545 
1546 	return rc;
1547 }
1548 
1549 /* Called when the entire module is being torn down. */
1550 static void
1551 vbdev_crypto_finish(void)
1552 {
1553 	struct bdev_names *name;
1554 	struct vbdev_dev *device;
1555 	struct device_qp *dev_qp;
1556 	unsigned i;
1557 	int rc;
1558 
1559 	while ((name = TAILQ_FIRST(&g_bdev_names))) {
1560 		TAILQ_REMOVE(&g_bdev_names, name, link);
1561 		free(name->drv_name);
1562 		free(name->key);
1563 		free(name->bdev_name);
1564 		free(name->vbdev_name);
1565 		free(name->key2);
1566 		free(name);
1567 	}
1568 
1569 	while ((device = TAILQ_FIRST(&g_vbdev_devs))) {
1570 		struct rte_cryptodev *rte_dev;
1571 
1572 		TAILQ_REMOVE(&g_vbdev_devs, device, link);
1573 		rte_cryptodev_stop(device->cdev_id);
1574 
1575 		assert(device->cdev_id < RTE_CRYPTO_MAX_DEVS);
1576 		rte_dev = &rte_cryptodevs[device->cdev_id];
1577 
1578 		if (rte_dev->dev_ops->queue_pair_release != NULL) {
1579 			for (i = 0; i < device->cdev_info.max_nb_queue_pairs; i++) {
1580 				rte_dev->dev_ops->queue_pair_release(rte_dev, i);
1581 			}
1582 		}
1583 		free(device);
1584 	}
1585 	rc = rte_vdev_uninit(AESNI_MB);
1586 	if (rc) {
1587 		SPDK_ERRLOG("%d from rte_vdev_uninit\n", rc);
1588 	}
1589 
1590 	while ((dev_qp = TAILQ_FIRST(&g_device_qp_qat))) {
1591 		TAILQ_REMOVE(&g_device_qp_qat, dev_qp, link);
1592 		free(dev_qp);
1593 	}
1594 
1595 	while ((dev_qp = TAILQ_FIRST(&g_device_qp_aesni_mb))) {
1596 		TAILQ_REMOVE(&g_device_qp_aesni_mb, dev_qp, link);
1597 		free(dev_qp);
1598 	}
1599 
1600 	rte_mempool_free(g_crypto_op_mp);
1601 	spdk_mempool_free(g_mbuf_mp);
1602 	rte_mempool_free(g_session_mp);
1603 	if (g_session_mp_priv != NULL) {
1604 		rte_mempool_free(g_session_mp_priv);
1605 	}
1606 }
1607 
1608 /* During init we'll be asked how much memory we'd like passed to us
1609  * in bev_io structures as context. Here's where we specify how
1610  * much context we want per IO.
1611  */
1612 static int
1613 vbdev_crypto_get_ctx_size(void)
1614 {
1615 	return sizeof(struct crypto_bdev_io);
1616 }
1617 
1618 static void
1619 vbdev_crypto_base_bdev_hotremove_cb(struct spdk_bdev *bdev_find)
1620 {
1621 	struct vbdev_crypto *crypto_bdev, *tmp;
1622 
1623 	TAILQ_FOREACH_SAFE(crypto_bdev, &g_vbdev_crypto, link, tmp) {
1624 		if (bdev_find == crypto_bdev->base_bdev) {
1625 			spdk_bdev_unregister(&crypto_bdev->crypto_bdev, NULL, NULL);
1626 		}
1627 	}
1628 }
1629 
1630 /* Called when the underlying base bdev triggers asynchronous event such as bdev removal. */
1631 static void
1632 vbdev_crypto_base_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
1633 				void *event_ctx)
1634 {
1635 	switch (type) {
1636 	case SPDK_BDEV_EVENT_REMOVE:
1637 		vbdev_crypto_base_bdev_hotremove_cb(bdev);
1638 		break;
1639 	default:
1640 		SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type);
1641 		break;
1642 	}
1643 }
1644 
1645 static void
1646 vbdev_crypto_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
1647 {
1648 	/* No config per bdev needed */
1649 }
1650 
1651 /* When we register our bdev this is how we specify our entry points. */
1652 static const struct spdk_bdev_fn_table vbdev_crypto_fn_table = {
1653 	.destruct		= vbdev_crypto_destruct,
1654 	.submit_request		= vbdev_crypto_submit_request,
1655 	.io_type_supported	= vbdev_crypto_io_type_supported,
1656 	.get_io_channel		= vbdev_crypto_get_io_channel,
1657 	.dump_info_json		= vbdev_crypto_dump_info_json,
1658 	.write_config_json	= vbdev_crypto_write_config_json
1659 };
1660 
1661 static struct spdk_bdev_module crypto_if = {
1662 	.name = "crypto",
1663 	.module_init = vbdev_crypto_init,
1664 	.get_ctx_size = vbdev_crypto_get_ctx_size,
1665 	.examine_config = vbdev_crypto_examine,
1666 	.module_fini = vbdev_crypto_finish,
1667 	.config_json = vbdev_crypto_config_json
1668 };
1669 
1670 SPDK_BDEV_MODULE_REGISTER(crypto, &crypto_if)
1671 
1672 static int
1673 vbdev_crypto_claim(const char *bdev_name)
1674 {
1675 	struct bdev_names *name;
1676 	struct vbdev_crypto *vbdev;
1677 	struct vbdev_dev *device;
1678 	struct spdk_bdev *bdev;
1679 	bool found = false;
1680 	int rc = 0;
1681 
1682 	if (g_number_of_claimed_volumes >= MAX_CRYPTO_VOLUMES) {
1683 		SPDK_DEBUGLOG(vbdev_crypto, "Reached max number of claimed volumes\n");
1684 		rc = -EINVAL;
1685 		goto error_vbdev_alloc;
1686 	}
1687 	g_number_of_claimed_volumes++;
1688 
1689 	/* Check our list of names from config versus this bdev and if
1690 	 * there's a match, create the crypto_bdev & bdev accordingly.
1691 	 */
1692 	TAILQ_FOREACH(name, &g_bdev_names, link) {
1693 		if (strcmp(name->bdev_name, bdev_name) != 0) {
1694 			continue;
1695 		}
1696 		SPDK_DEBUGLOG(vbdev_crypto, "Match on %s\n", bdev_name);
1697 
1698 		vbdev = calloc(1, sizeof(struct vbdev_crypto));
1699 		if (!vbdev) {
1700 			SPDK_ERRLOG("could not allocate crypto_bdev\n");
1701 			rc = -ENOMEM;
1702 			goto error_vbdev_alloc;
1703 		}
1704 
1705 		vbdev->crypto_bdev.name = strdup(name->vbdev_name);
1706 		if (!vbdev->crypto_bdev.name) {
1707 			SPDK_ERRLOG("could not allocate crypto_bdev name\n");
1708 			rc = -ENOMEM;
1709 			goto error_bdev_name;
1710 		}
1711 
1712 		vbdev->key = strdup(name->key);
1713 		if (!vbdev->key) {
1714 			SPDK_ERRLOG("could not allocate crypto_bdev key\n");
1715 			rc = -ENOMEM;
1716 			goto error_alloc_key;
1717 		}
1718 
1719 		if (name->key2) {
1720 			vbdev->key2 = strdup(name->key2);
1721 			if (!vbdev->key2) {
1722 				SPDK_ERRLOG("could not allocate crypto_bdev key2\n");
1723 				rc = -ENOMEM;
1724 				goto error_alloc_key2;
1725 			}
1726 		}
1727 
1728 		vbdev->drv_name = strdup(name->drv_name);
1729 		if (!vbdev->drv_name) {
1730 			SPDK_ERRLOG("could not allocate crypto_bdev drv_name\n");
1731 			rc = -ENOMEM;
1732 			goto error_drv_name;
1733 		}
1734 
1735 		vbdev->crypto_bdev.product_name = "crypto";
1736 
1737 		rc = spdk_bdev_open_ext(bdev_name, true, vbdev_crypto_base_bdev_event_cb,
1738 					NULL, &vbdev->base_desc);
1739 		if (rc) {
1740 			if (rc != -ENODEV) {
1741 				SPDK_ERRLOG("could not open bdev %s\n", bdev_name);
1742 			}
1743 			goto error_open;
1744 		}
1745 
1746 		bdev = spdk_bdev_desc_get_bdev(vbdev->base_desc);
1747 		vbdev->base_bdev = bdev;
1748 
1749 		vbdev->crypto_bdev.write_cache = bdev->write_cache;
1750 		vbdev->cipher = AES_CBC;
1751 		if (strcmp(vbdev->drv_name, QAT) == 0) {
1752 			vbdev->crypto_bdev.required_alignment =
1753 				spdk_max(spdk_u32log2(bdev->blocklen), bdev->required_alignment);
1754 			SPDK_NOTICELOG("QAT in use: Required alignment set to %u\n",
1755 				       vbdev->crypto_bdev.required_alignment);
1756 			if (strcmp(name->cipher, AES_CBC) == 0) {
1757 				SPDK_NOTICELOG("QAT using cipher: AES_CBC\n");
1758 			} else {
1759 				SPDK_NOTICELOG("QAT using cipher: AES_XTS\n");
1760 				vbdev->cipher = AES_XTS;
1761 				/* DPDK expects they keys to be concatenated together. */
1762 				vbdev->xts_key = calloc(1, (AES_XTS_KEY_LENGTH * 2) + 1);
1763 				if (vbdev->xts_key == NULL) {
1764 					SPDK_ERRLOG("could not allocate memory for XTS key\n");
1765 					rc = -ENOMEM;
1766 					goto error_xts_key;
1767 				}
1768 				memcpy(vbdev->xts_key, vbdev->key, AES_XTS_KEY_LENGTH);
1769 				assert(name->key2);
1770 				memcpy(vbdev->xts_key + AES_XTS_KEY_LENGTH, name->key2, AES_XTS_KEY_LENGTH + 1);
1771 			}
1772 		} else {
1773 			vbdev->crypto_bdev.required_alignment = bdev->required_alignment;
1774 		}
1775 		/* Note: CRYPTO_MAX_IO is in units of bytes, optimal_io_boundary is
1776 		 * in units of blocks.
1777 		 */
1778 		if (bdev->optimal_io_boundary > 0) {
1779 			vbdev->crypto_bdev.optimal_io_boundary =
1780 				spdk_min((CRYPTO_MAX_IO / bdev->blocklen), bdev->optimal_io_boundary);
1781 		} else {
1782 			vbdev->crypto_bdev.optimal_io_boundary = (CRYPTO_MAX_IO / bdev->blocklen);
1783 		}
1784 		vbdev->crypto_bdev.split_on_optimal_io_boundary = true;
1785 		vbdev->crypto_bdev.blocklen = bdev->blocklen;
1786 		vbdev->crypto_bdev.blockcnt = bdev->blockcnt;
1787 
1788 		/* This is the context that is passed to us when the bdev
1789 		 * layer calls in so we'll save our crypto_bdev node here.
1790 		 */
1791 		vbdev->crypto_bdev.ctxt = vbdev;
1792 		vbdev->crypto_bdev.fn_table = &vbdev_crypto_fn_table;
1793 		vbdev->crypto_bdev.module = &crypto_if;
1794 		TAILQ_INSERT_TAIL(&g_vbdev_crypto, vbdev, link);
1795 
1796 		spdk_io_device_register(vbdev, crypto_bdev_ch_create_cb, crypto_bdev_ch_destroy_cb,
1797 					sizeof(struct crypto_io_channel), vbdev->crypto_bdev.name);
1798 
1799 		/* Save the thread where the base device is opened */
1800 		vbdev->thread = spdk_get_thread();
1801 
1802 		rc = spdk_bdev_module_claim_bdev(bdev, vbdev->base_desc, vbdev->crypto_bdev.module);
1803 		if (rc) {
1804 			SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(bdev));
1805 			goto error_claim;
1806 		}
1807 
1808 		/* To init the session we have to get the cryptoDev device ID for this vbdev */
1809 		TAILQ_FOREACH(device, &g_vbdev_devs, link) {
1810 			if (strcmp(device->cdev_info.driver_name, vbdev->drv_name) == 0) {
1811 				found = true;
1812 				break;
1813 			}
1814 		}
1815 		if (found == false) {
1816 			SPDK_ERRLOG("ERROR can't match crypto device driver to crypto vbdev!\n");
1817 			rc = -EINVAL;
1818 			goto error_cant_find_devid;
1819 		}
1820 
1821 		/* Get sessions. */
1822 		vbdev->session_encrypt = rte_cryptodev_sym_session_create(g_session_mp);
1823 		if (NULL == vbdev->session_encrypt) {
1824 			SPDK_ERRLOG("ERROR trying to create crypto session!\n");
1825 			rc = -EINVAL;
1826 			goto error_session_en_create;
1827 		}
1828 
1829 		vbdev->session_decrypt = rte_cryptodev_sym_session_create(g_session_mp);
1830 		if (NULL == vbdev->session_decrypt) {
1831 			SPDK_ERRLOG("ERROR trying to create crypto session!\n");
1832 			rc = -EINVAL;
1833 			goto error_session_de_create;
1834 		}
1835 
1836 		/* Init our per vbdev xform with the desired cipher options. */
1837 		vbdev->cipher_xform.type = RTE_CRYPTO_SYM_XFORM_CIPHER;
1838 		vbdev->cipher_xform.cipher.iv.offset = IV_OFFSET;
1839 		if (strcmp(name->cipher, AES_CBC) == 0) {
1840 			vbdev->cipher_xform.cipher.key.data = vbdev->key;
1841 			vbdev->cipher_xform.cipher.algo = RTE_CRYPTO_CIPHER_AES_CBC;
1842 			vbdev->cipher_xform.cipher.key.length = AES_CBC_KEY_LENGTH;
1843 		} else {
1844 			vbdev->cipher_xform.cipher.key.data = vbdev->xts_key;
1845 			vbdev->cipher_xform.cipher.algo = RTE_CRYPTO_CIPHER_AES_XTS;
1846 			vbdev->cipher_xform.cipher.key.length = AES_XTS_KEY_LENGTH * 2;
1847 		}
1848 		vbdev->cipher_xform.cipher.iv.length = AES_CBC_IV_LENGTH;
1849 
1850 		vbdev->cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT;
1851 		rc = rte_cryptodev_sym_session_init(device->cdev_id, vbdev->session_encrypt,
1852 						    &vbdev->cipher_xform,
1853 						    g_session_mp_priv ? g_session_mp_priv : g_session_mp);
1854 		if (rc < 0) {
1855 			SPDK_ERRLOG("ERROR trying to init encrypt session!\n");
1856 			rc = -EINVAL;
1857 			goto error_session_init;
1858 		}
1859 
1860 		vbdev->cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_DECRYPT;
1861 		rc = rte_cryptodev_sym_session_init(device->cdev_id, vbdev->session_decrypt,
1862 						    &vbdev->cipher_xform,
1863 						    g_session_mp_priv ? g_session_mp_priv : g_session_mp);
1864 		if (rc < 0) {
1865 			SPDK_ERRLOG("ERROR trying to init decrypt session!\n");
1866 			rc = -EINVAL;
1867 			goto error_session_init;
1868 		}
1869 
1870 		rc = spdk_bdev_register(&vbdev->crypto_bdev);
1871 		if (rc < 0) {
1872 			SPDK_ERRLOG("ERROR trying to register bdev\n");
1873 			rc = -EINVAL;
1874 			goto error_bdev_register;
1875 		}
1876 		SPDK_DEBUGLOG(vbdev_crypto, "registered io_device and virtual bdev for: %s\n",
1877 			      name->vbdev_name);
1878 		break;
1879 	}
1880 
1881 	return rc;
1882 
1883 	/* Error cleanup paths. */
1884 error_bdev_register:
1885 error_session_init:
1886 	rte_cryptodev_sym_session_free(vbdev->session_decrypt);
1887 error_session_de_create:
1888 	rte_cryptodev_sym_session_free(vbdev->session_encrypt);
1889 error_session_en_create:
1890 error_cant_find_devid:
1891 error_claim:
1892 	spdk_bdev_close(vbdev->base_desc);
1893 	TAILQ_REMOVE(&g_vbdev_crypto, vbdev, link);
1894 	spdk_io_device_unregister(vbdev, NULL);
1895 	free(vbdev->xts_key);
1896 error_xts_key:
1897 error_open:
1898 	free(vbdev->drv_name);
1899 error_drv_name:
1900 	free(vbdev->key2);
1901 error_alloc_key2:
1902 	free(vbdev->key);
1903 error_alloc_key:
1904 	free(vbdev->crypto_bdev.name);
1905 error_bdev_name:
1906 	free(vbdev);
1907 error_vbdev_alloc:
1908 	g_number_of_claimed_volumes--;
1909 	return rc;
1910 }
1911 
1912 /* RPC entry for deleting a crypto vbdev. */
1913 void
1914 delete_crypto_disk(struct spdk_bdev *bdev, spdk_delete_crypto_complete cb_fn,
1915 		   void *cb_arg)
1916 {
1917 	struct bdev_names *name;
1918 
1919 	if (!bdev || bdev->module != &crypto_if) {
1920 		cb_fn(cb_arg, -ENODEV);
1921 		return;
1922 	}
1923 
1924 	/* Remove the association (vbdev, bdev) from g_bdev_names. This is required so that the
1925 	 * vbdev does not get re-created if the same bdev is constructed at some other time,
1926 	 * unless the underlying bdev was hot-removed.
1927 	 */
1928 	TAILQ_FOREACH(name, &g_bdev_names, link) {
1929 		if (strcmp(name->vbdev_name, bdev->name) == 0) {
1930 			TAILQ_REMOVE(&g_bdev_names, name, link);
1931 			free(name->bdev_name);
1932 			free(name->vbdev_name);
1933 			free(name->drv_name);
1934 			free(name->key);
1935 			free(name->key2);
1936 			free(name);
1937 			break;
1938 		}
1939 	}
1940 
1941 	/* Additional cleanup happens in the destruct callback. */
1942 	spdk_bdev_unregister(bdev, cb_fn, cb_arg);
1943 }
1944 
1945 /* Because we specified this function in our crypto bdev function table when we
1946  * registered our crypto bdev, we'll get this call anytime a new bdev shows up.
1947  * Here we need to decide if we care about it and if so what to do. We
1948  * parsed the config file at init so we check the new bdev against the list
1949  * we built up at that time and if the user configured us to attach to this
1950  * bdev, here's where we do it.
1951  */
1952 static void
1953 vbdev_crypto_examine(struct spdk_bdev *bdev)
1954 {
1955 	vbdev_crypto_claim(spdk_bdev_get_name(bdev));
1956 	spdk_bdev_module_examine_done(&crypto_if);
1957 }
1958 
1959 SPDK_LOG_REGISTER_COMPONENT(vbdev_crypto)
1960