xref: /spdk/module/bdev/crypto/vbdev_crypto.c (revision c39647df83e4be9bcc49025132c48bf2414ef8b1)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUcryptoION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "vbdev_crypto.h"
35 
36 #include "spdk/env.h"
37 #include "spdk/endian.h"
38 #include "spdk/thread.h"
39 #include "spdk/bdev_module.h"
40 #include "spdk/log.h"
41 
42 #include <rte_config.h>
43 #include <rte_bus_vdev.h>
44 #include <rte_crypto.h>
45 #include <rte_cryptodev.h>
46 #include <rte_mbuf_dyn.h>
47 
48 /* Used to store IO context in mbuf */
49 static const struct rte_mbuf_dynfield rte_mbuf_dynfield_io_context = {
50 	.name = "context_bdev_io",
51 	.size = sizeof(uint64_t),
52 	.align = __alignof__(uint64_t),
53 	.flags = 0,
54 };
55 static int g_mbuf_offset;
56 
57 /* To add support for new device types, follow the examples of the following...
58  * Note that the string names are defined by the DPDK PMD in question so be
59  * sure to use the exact names.
60  */
61 #define MAX_NUM_DRV_TYPES 2
62 
63 /* The VF spread is the number of queue pairs between virtual functions, we use this to
64  * load balance the QAT device.
65  */
66 #define QAT_VF_SPREAD 32
67 static uint8_t g_qat_total_qp = 0;
68 static uint8_t g_next_qat_index;
69 
70 const char *g_driver_names[MAX_NUM_DRV_TYPES] = { AESNI_MB, QAT };
71 
72 /* Global list of available crypto devices. */
73 struct vbdev_dev {
74 	struct rte_cryptodev_info	cdev_info;	/* includes device friendly name */
75 	uint8_t				cdev_id;	/* identifier for the device */
76 	TAILQ_ENTRY(vbdev_dev)		link;
77 };
78 static TAILQ_HEAD(, vbdev_dev) g_vbdev_devs = TAILQ_HEAD_INITIALIZER(g_vbdev_devs);
79 
80 /* Global list and lock for unique device/queue pair combos. We keep 1 list per supported PMD
81  * so that we can optimize per PMD where it make sense. For example, with QAT there an optimal
82  * pattern for assigning queue pairs where with AESNI there is not.
83  */
84 struct device_qp {
85 	struct vbdev_dev		*device;	/* ptr to crypto device */
86 	uint8_t				qp;		/* queue pair for this node */
87 	bool				in_use;		/* whether this node is in use or not */
88 	uint8_t				index;		/* used by QAT to load balance placement of qpairs */
89 	TAILQ_ENTRY(device_qp)		link;
90 };
91 static TAILQ_HEAD(, device_qp) g_device_qp_qat = TAILQ_HEAD_INITIALIZER(g_device_qp_qat);
92 static TAILQ_HEAD(, device_qp) g_device_qp_aesni_mb = TAILQ_HEAD_INITIALIZER(g_device_qp_aesni_mb);
93 static pthread_mutex_t g_device_qp_lock = PTHREAD_MUTEX_INITIALIZER;
94 
95 
96 /* In order to limit the number of resources we need to do one crypto
97  * operation per LBA (we use LBA as IV), we tell the bdev layer that
98  * our max IO size is something reasonable. Units here are in bytes.
99  */
100 #define CRYPTO_MAX_IO		(64 * 1024)
101 
102 /* This controls how many ops will be dequeued from the crypto driver in one run
103  * of the poller. It is mainly a performance knob as it effectively determines how
104  * much work the poller has to do.  However even that can vary between crypto drivers
105  * as the AESNI_MB driver for example does all the crypto work on dequeue whereas the
106  * QAT driver just dequeues what has been completed already.
107  */
108 #define MAX_DEQUEUE_BURST_SIZE	64
109 
110 /* When enqueueing, we need to supply the crypto driver with an array of pointers to
111  * operation structs. As each of these can be max 512B, we can adjust the CRYPTO_MAX_IO
112  * value in conjunction with the other defines to make sure we're not using crazy amounts
113  * of memory. All of these numbers can and probably should be adjusted based on the
114  * workload. By default we'll use the worst case (smallest) block size for the
115  * minimum number of array entries. As an example, a CRYPTO_MAX_IO size of 64K with 512B
116  * blocks would give us an enqueue array size of 128.
117  */
118 #define MAX_ENQUEUE_ARRAY_SIZE (CRYPTO_MAX_IO / 512)
119 
120 /* The number of MBUFS we need must be a power of two and to support other small IOs
121  * in addition to the limits mentioned above, we go to the next power of two. It is
122  * big number because it is one mempool for source and destination mbufs. It may
123  * need to be bigger to support multiple crypto drivers at once.
124  */
125 #define NUM_MBUFS		32768
126 #define POOL_CACHE_SIZE		256
127 #define MAX_CRYPTO_VOLUMES	128
128 #define NUM_SESSIONS		(2 * MAX_CRYPTO_VOLUMES)
129 #define SESS_MEMPOOL_CACHE_SIZE 0
130 uint8_t g_number_of_claimed_volumes = 0;
131 
132 /* This is the max number of IOs we can supply to any crypto device QP at one time.
133  * It can vary between drivers.
134  */
135 #define CRYPTO_QP_DESCRIPTORS	2048
136 
137 /* Specific to AES_CBC. */
138 #define AES_CBC_IV_LENGTH	16
139 #define AES_CBC_KEY_LENGTH	16
140 #define AES_XTS_KEY_LENGTH	16	/* XTS uses 2 keys, each of this size. */
141 #define AESNI_MB_NUM_QP		64
142 
143 /* Common for suported devices. */
144 #define IV_OFFSET            (sizeof(struct rte_crypto_op) + \
145 				sizeof(struct rte_crypto_sym_op))
146 #define QUEUED_OP_OFFSET (IV_OFFSET + AES_CBC_IV_LENGTH)
147 
148 static void _complete_internal_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
149 static void _complete_internal_read(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
150 static void _complete_internal_write(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
151 static void vbdev_crypto_examine(struct spdk_bdev *bdev);
152 static int vbdev_crypto_claim(const char *bdev_name);
153 static void vbdev_crypto_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io);
154 
155 /* List of crypto_bdev names and their base bdevs via configuration file. */
156 struct bdev_names {
157 	char			*vbdev_name;	/* name of the vbdev to create */
158 	char			*bdev_name;	/* base bdev name */
159 
160 	/* Note, for dev/test we allow use of key in the config file, for production
161 	 * use, you must use an RPC to specify the key for security reasons.
162 	 */
163 	uint8_t			*key;		/* key per bdev */
164 	char			*drv_name;	/* name of the crypto device driver */
165 	char			*cipher;	/* AES_CBC or AES_XTS */
166 	uint8_t			*key2;		/* key #2 for AES_XTS, per bdev */
167 	TAILQ_ENTRY(bdev_names)	link;
168 };
169 static TAILQ_HEAD(, bdev_names) g_bdev_names = TAILQ_HEAD_INITIALIZER(g_bdev_names);
170 
171 /* List of virtual bdevs and associated info for each. We keep the device friendly name here even
172  * though its also in the device struct because we use it early on.
173  */
174 struct vbdev_crypto {
175 	struct spdk_bdev		*base_bdev;		/* the thing we're attaching to */
176 	struct spdk_bdev_desc		*base_desc;		/* its descriptor we get from open */
177 	struct spdk_bdev		crypto_bdev;		/* the crypto virtual bdev */
178 	uint8_t				*key;			/* key per bdev */
179 	uint8_t				*key2;			/* for XTS */
180 	uint8_t				*xts_key;		/* key + key 2 */
181 	char				*drv_name;		/* name of the crypto device driver */
182 	char				*cipher;		/* cipher used */
183 	struct rte_cryptodev_sym_session *session_encrypt;	/* encryption session for this bdev */
184 	struct rte_cryptodev_sym_session *session_decrypt;	/* decryption session for this bdev */
185 	struct rte_crypto_sym_xform	cipher_xform;		/* crypto control struct for this bdev */
186 	TAILQ_ENTRY(vbdev_crypto)	link;
187 	struct spdk_thread		*thread;		/* thread where base device is opened */
188 };
189 static TAILQ_HEAD(, vbdev_crypto) g_vbdev_crypto = TAILQ_HEAD_INITIALIZER(g_vbdev_crypto);
190 
191 /* Shared mempools between all devices on this system */
192 static struct rte_mempool *g_session_mp = NULL;
193 static struct rte_mempool *g_session_mp_priv = NULL;
194 static struct spdk_mempool *g_mbuf_mp = NULL;		/* mbuf mempool */
195 static struct rte_mempool *g_crypto_op_mp = NULL;	/* crypto operations, must be rte* mempool */
196 
197 /* For queueing up crypto operations that we can't submit for some reason */
198 struct vbdev_crypto_op {
199 	uint8_t					cdev_id;
200 	uint8_t					qp;
201 	struct rte_crypto_op			*crypto_op;
202 	struct spdk_bdev_io			*bdev_io;
203 	TAILQ_ENTRY(vbdev_crypto_op)		link;
204 };
205 #define QUEUED_OP_LENGTH (sizeof(struct vbdev_crypto_op))
206 
207 /* The crypto vbdev channel struct. It is allocated and freed on my behalf by the io channel code.
208  * We store things in here that are needed on per thread basis like the base_channel for this thread,
209  * and the poller for this thread.
210  */
211 struct crypto_io_channel {
212 	struct spdk_io_channel		*base_ch;		/* IO channel of base device */
213 	struct spdk_poller		*poller;		/* completion poller */
214 	struct device_qp		*device_qp;		/* unique device/qp combination for this channel */
215 	TAILQ_HEAD(, spdk_bdev_io)	pending_cry_ios;	/* outstanding operations to the crypto device */
216 	struct spdk_io_channel_iter	*iter;			/* used with for_each_channel in reset */
217 	TAILQ_HEAD(, vbdev_crypto_op)	queued_cry_ops;		/* queued for re-submission to CryptoDev */
218 };
219 
220 /* This is the crypto per IO context that the bdev layer allocates for us opaquely and attaches to
221  * each IO for us.
222  */
223 struct crypto_bdev_io {
224 	int cryop_cnt_remaining;			/* counter used when completing crypto ops */
225 	struct crypto_io_channel *crypto_ch;		/* need to store for crypto completion handling */
226 	struct vbdev_crypto *crypto_bdev;		/* the crypto node struct associated with this IO */
227 	struct spdk_bdev_io *orig_io;			/* the original IO */
228 	struct spdk_bdev_io *read_io;			/* the read IO we issued */
229 	int8_t bdev_io_status;				/* the status we'll report back on the bdev IO */
230 	bool on_pending_list;
231 	/* Used for the single contiguous buffer that serves as the crypto destination target for writes */
232 	uint64_t aux_num_blocks;			/* num of blocks for the contiguous buffer */
233 	uint64_t aux_offset_blocks;			/* block offset on media */
234 	void *aux_buf_raw;				/* raw buffer that the bdev layer gave us for write buffer */
235 	struct iovec aux_buf_iov;			/* iov representing aligned contig write buffer */
236 
237 	/* for bdev_io_wait */
238 	struct spdk_bdev_io_wait_entry bdev_io_wait;
239 	struct spdk_io_channel *ch;
240 };
241 
242 /* Called by vbdev_crypto_init_crypto_drivers() to init each discovered crypto device */
243 static int
244 create_vbdev_dev(uint8_t index, uint16_t num_lcores)
245 {
246 	struct vbdev_dev *device;
247 	uint8_t j, cdev_id, cdrv_id;
248 	struct device_qp *dev_qp;
249 	struct device_qp *tmp_qp;
250 	int rc;
251 	TAILQ_HEAD(device_qps, device_qp) *dev_qp_head;
252 
253 	device = calloc(1, sizeof(struct vbdev_dev));
254 	if (!device) {
255 		return -ENOMEM;
256 	}
257 
258 	/* Get details about this device. */
259 	rte_cryptodev_info_get(index, &device->cdev_info);
260 	cdrv_id = device->cdev_info.driver_id;
261 	cdev_id = device->cdev_id = index;
262 
263 	/* QAT_ASYM devices are not supported at this time. */
264 	if (strcmp(device->cdev_info.driver_name, QAT_ASYM) == 0) {
265 		free(device);
266 		return 0;
267 	}
268 
269 	/* Before going any further, make sure we have enough resources for this
270 	 * device type to function.  We need a unique queue pair per core accross each
271 	 * device type to remain lockless....
272 	 */
273 	if ((rte_cryptodev_device_count_by_driver(cdrv_id) *
274 	     device->cdev_info.max_nb_queue_pairs) < num_lcores) {
275 		SPDK_ERRLOG("Insufficient unique queue pairs available for %s\n",
276 			    device->cdev_info.driver_name);
277 		SPDK_ERRLOG("Either add more crypto devices or decrease core count\n");
278 		rc = -EINVAL;
279 		goto err;
280 	}
281 
282 	/* Setup queue pairs. */
283 	struct rte_cryptodev_config conf = {
284 		.nb_queue_pairs = device->cdev_info.max_nb_queue_pairs,
285 		.socket_id = SPDK_ENV_SOCKET_ID_ANY
286 	};
287 
288 	rc = rte_cryptodev_configure(cdev_id, &conf);
289 	if (rc < 0) {
290 		SPDK_ERRLOG("Failed to configure cryptodev %u\n", cdev_id);
291 		rc = -EINVAL;
292 		goto err;
293 	}
294 
295 	struct rte_cryptodev_qp_conf qp_conf = {
296 		.nb_descriptors = CRYPTO_QP_DESCRIPTORS,
297 		.mp_session = g_session_mp,
298 		.mp_session_private = g_session_mp_priv,
299 	};
300 
301 	/* Pre-setup all potential qpairs now and assign them in the channel
302 	 * callback. If we were to create them there, we'd have to stop the
303 	 * entire device affecting all other threads that might be using it
304 	 * even on other queue pairs.
305 	 */
306 	for (j = 0; j < device->cdev_info.max_nb_queue_pairs; j++) {
307 		rc = rte_cryptodev_queue_pair_setup(cdev_id, j, &qp_conf, SOCKET_ID_ANY);
308 		if (rc < 0) {
309 			SPDK_ERRLOG("Failed to setup queue pair %u on "
310 				    "cryptodev %u\n", j, cdev_id);
311 			rc = -EINVAL;
312 			goto err;
313 		}
314 	}
315 
316 	rc = rte_cryptodev_start(cdev_id);
317 	if (rc < 0) {
318 		SPDK_ERRLOG("Failed to start device %u: error %d\n",
319 			    cdev_id, rc);
320 		rc = -EINVAL;
321 		goto err;
322 	}
323 
324 	/* Select the right device/qp list based on driver name
325 	 * or error if it does not exist.
326 	 */
327 	if (strcmp(device->cdev_info.driver_name, QAT) == 0) {
328 		dev_qp_head = (struct device_qps *)&g_device_qp_qat;
329 	} else if (strcmp(device->cdev_info.driver_name, AESNI_MB) == 0) {
330 		dev_qp_head = (struct device_qps *)&g_device_qp_aesni_mb;
331 	} else {
332 		rc = -EINVAL;
333 		goto err;
334 	}
335 
336 	/* Build up lists of device/qp combinations per PMD */
337 	for (j = 0; j < device->cdev_info.max_nb_queue_pairs; j++) {
338 		dev_qp = calloc(1, sizeof(struct device_qp));
339 		if (!dev_qp) {
340 			rc = -ENOMEM;
341 			goto err_qp_alloc;
342 		}
343 		dev_qp->device = device;
344 		dev_qp->qp = j;
345 		dev_qp->in_use = false;
346 		if (strcmp(device->cdev_info.driver_name, QAT) == 0) {
347 			g_qat_total_qp++;
348 		}
349 		TAILQ_INSERT_TAIL(dev_qp_head, dev_qp, link);
350 	}
351 
352 	/* Add to our list of available crypto devices. */
353 	TAILQ_INSERT_TAIL(&g_vbdev_devs, device, link);
354 
355 	return 0;
356 err_qp_alloc:
357 	TAILQ_FOREACH_SAFE(dev_qp, dev_qp_head, link, tmp_qp) {
358 		TAILQ_REMOVE(dev_qp_head, dev_qp, link);
359 		free(dev_qp);
360 	}
361 err:
362 	free(device);
363 
364 	return rc;
365 }
366 
367 /* This is called from the module's init function. We setup all crypto devices early on as we are unable
368  * to easily dynamically configure queue pairs after the drivers are up and running.  So, here, we
369  * configure the max capabilities of each device and assign threads to queue pairs as channels are
370  * requested.
371  */
372 static int
373 vbdev_crypto_init_crypto_drivers(void)
374 {
375 	uint8_t cdev_count;
376 	uint8_t cdev_id;
377 	int i, rc = 0;
378 	struct vbdev_dev *device;
379 	struct vbdev_dev *tmp_dev;
380 	struct device_qp *dev_qp;
381 	unsigned int max_sess_size = 0, sess_size;
382 	uint16_t num_lcores = rte_lcore_count();
383 	char aesni_args[32];
384 
385 	/* Only the first call, via RPC or module init should init the crypto drivers. */
386 	if (g_session_mp != NULL) {
387 		return 0;
388 	}
389 
390 	/* We always init AESNI_MB */
391 	snprintf(aesni_args, sizeof(aesni_args), "max_nb_queue_pairs=%d", AESNI_MB_NUM_QP);
392 	rc = rte_vdev_init(AESNI_MB, aesni_args);
393 	if (rc) {
394 		SPDK_ERRLOG("error creating virtual PMD %s\n", AESNI_MB);
395 		return -EINVAL;
396 	}
397 
398 	/* If we have no crypto devices, there's no reason to continue. */
399 	cdev_count = rte_cryptodev_count();
400 	if (cdev_count == 0) {
401 		return 0;
402 	}
403 
404 	g_mbuf_offset = rte_mbuf_dynfield_register(&rte_mbuf_dynfield_io_context);
405 	if (g_mbuf_offset < 0) {
406 		SPDK_ERRLOG("error registering dynamic field with DPDK\n");
407 		return -EINVAL;
408 	}
409 
410 	/*
411 	 * Create global mempools, shared by all devices regardless of type.
412 	 */
413 
414 	/* First determine max session size, most pools are shared by all the devices,
415 	 * so we need to find the global max sessions size.
416 	 */
417 	for (cdev_id = 0; cdev_id < cdev_count; cdev_id++) {
418 		sess_size = rte_cryptodev_sym_get_private_session_size(cdev_id);
419 		if (sess_size > max_sess_size) {
420 			max_sess_size = sess_size;
421 		}
422 	}
423 
424 	g_session_mp_priv = rte_mempool_create("session_mp_priv", NUM_SESSIONS, max_sess_size,
425 					       SESS_MEMPOOL_CACHE_SIZE, 0, NULL, NULL, NULL,
426 					       NULL, SOCKET_ID_ANY, 0);
427 	if (g_session_mp_priv == NULL) {
428 		SPDK_ERRLOG("Cannot create private session pool max size 0x%x\n", max_sess_size);
429 		return -ENOMEM;
430 	}
431 
432 	g_session_mp = rte_cryptodev_sym_session_pool_create(
433 			       "session_mp",
434 			       NUM_SESSIONS, 0, SESS_MEMPOOL_CACHE_SIZE, 0,
435 			       SOCKET_ID_ANY);
436 	if (g_session_mp == NULL) {
437 		SPDK_ERRLOG("Cannot create session pool max size 0x%x\n", max_sess_size);
438 		goto error_create_session_mp;
439 		return -ENOMEM;
440 	}
441 
442 	g_mbuf_mp = spdk_mempool_create("mbuf_mp", NUM_MBUFS, sizeof(struct rte_mbuf),
443 					SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
444 					SPDK_ENV_SOCKET_ID_ANY);
445 	if (g_mbuf_mp == NULL) {
446 		SPDK_ERRLOG("Cannot create mbuf pool\n");
447 		rc = -ENOMEM;
448 		goto error_create_mbuf;
449 	}
450 
451 	/* We use per op private data to store the IV and our own struct
452 	 * for queueing ops.
453 	 */
454 	g_crypto_op_mp = rte_crypto_op_pool_create("op_mp",
455 			 RTE_CRYPTO_OP_TYPE_SYMMETRIC,
456 			 NUM_MBUFS,
457 			 POOL_CACHE_SIZE,
458 			 AES_CBC_IV_LENGTH + QUEUED_OP_LENGTH,
459 			 rte_socket_id());
460 
461 	if (g_crypto_op_mp == NULL) {
462 		SPDK_ERRLOG("Cannot create op pool\n");
463 		rc = -ENOMEM;
464 		goto error_create_op;
465 	}
466 
467 	/* Init all devices */
468 	for (i = 0; i < cdev_count; i++) {
469 		rc = create_vbdev_dev(i, num_lcores);
470 		if (rc) {
471 			goto err;
472 		}
473 	}
474 
475 	/* Assign index values to the QAT device qp nodes so that we can
476 	 * assign them for optimal performance.
477 	 */
478 	i = 0;
479 	TAILQ_FOREACH(dev_qp, &g_device_qp_qat, link) {
480 		dev_qp->index = i++;
481 	}
482 
483 	return 0;
484 
485 	/* Error cleanup paths. */
486 err:
487 	TAILQ_FOREACH_SAFE(device, &g_vbdev_devs, link, tmp_dev) {
488 		TAILQ_REMOVE(&g_vbdev_devs, device, link);
489 		free(device);
490 	}
491 	rte_mempool_free(g_crypto_op_mp);
492 	g_crypto_op_mp = NULL;
493 error_create_op:
494 	spdk_mempool_free(g_mbuf_mp);
495 	g_mbuf_mp = NULL;
496 error_create_mbuf:
497 	rte_mempool_free(g_session_mp);
498 	g_session_mp = NULL;
499 error_create_session_mp:
500 	if (g_session_mp_priv != NULL) {
501 		rte_mempool_free(g_session_mp_priv);
502 		g_session_mp_priv = NULL;
503 	}
504 	return rc;
505 }
506 
507 /* Following an encrypt or decrypt we need to then either write the encrypted data or finish
508  * the read on decrypted data. Do that here.
509  */
510 static void
511 _crypto_operation_complete(struct spdk_bdev_io *bdev_io)
512 {
513 	struct vbdev_crypto *crypto_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_crypto,
514 					   crypto_bdev);
515 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
516 	struct crypto_io_channel *crypto_ch = io_ctx->crypto_ch;
517 	struct spdk_bdev_io *free_me = io_ctx->read_io;
518 	int rc = 0;
519 
520 	TAILQ_REMOVE(&crypto_ch->pending_cry_ios, bdev_io, module_link);
521 
522 	if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
523 
524 		/* Complete the original IO and then free the one that we created
525 		 * as a result of issuing an IO via submit_request.
526 		 */
527 		if (io_ctx->bdev_io_status != SPDK_BDEV_IO_STATUS_FAILED) {
528 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
529 		} else {
530 			SPDK_ERRLOG("Issue with decryption on bdev_io %p\n", bdev_io);
531 			rc = -EINVAL;
532 		}
533 		spdk_bdev_free_io(free_me);
534 
535 	} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
536 
537 		if (io_ctx->bdev_io_status != SPDK_BDEV_IO_STATUS_FAILED) {
538 			/* Write the encrypted data. */
539 			rc = spdk_bdev_writev_blocks(crypto_bdev->base_desc, crypto_ch->base_ch,
540 						     &io_ctx->aux_buf_iov, 1, io_ctx->aux_offset_blocks,
541 						     io_ctx->aux_num_blocks, _complete_internal_write,
542 						     bdev_io);
543 		} else {
544 			SPDK_ERRLOG("Issue with encryption on bdev_io %p\n", bdev_io);
545 			rc = -EINVAL;
546 		}
547 
548 	} else {
549 		SPDK_ERRLOG("Unknown bdev type %u on crypto operation completion\n",
550 			    bdev_io->type);
551 		rc = -EINVAL;
552 	}
553 
554 	if (rc) {
555 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
556 	}
557 }
558 
559 static int _crypto_operation(struct spdk_bdev_io *bdev_io,
560 			     enum rte_crypto_cipher_operation crypto_op,
561 			     void *aux_buf);
562 
563 /* This is the poller for the crypto device. It uses a single API to dequeue whatever is ready at
564  * the device. Then we need to decide if what we've got so far (including previous poller
565  * runs) totals up to one or more complete bdev_ios and if so continue with the bdev_io
566  * accordingly. This means either completing a read or issuing a new write.
567  */
568 static int
569 crypto_dev_poller(void *args)
570 {
571 	struct crypto_io_channel *crypto_ch = args;
572 	uint8_t cdev_id = crypto_ch->device_qp->device->cdev_id;
573 	int i, num_dequeued_ops, num_enqueued_ops;
574 	struct spdk_bdev_io *bdev_io = NULL;
575 	struct crypto_bdev_io *io_ctx = NULL;
576 	struct rte_crypto_op *dequeued_ops[MAX_DEQUEUE_BURST_SIZE];
577 	struct rte_crypto_op *mbufs_to_free[2 * MAX_DEQUEUE_BURST_SIZE];
578 	int num_mbufs = 0;
579 	struct vbdev_crypto_op *op_to_resubmit;
580 
581 	/* Each run of the poller will get just what the device has available
582 	 * at the moment we call it, we don't check again after draining the
583 	 * first batch.
584 	 */
585 	num_dequeued_ops = rte_cryptodev_dequeue_burst(cdev_id, crypto_ch->device_qp->qp,
586 			   dequeued_ops, MAX_DEQUEUE_BURST_SIZE);
587 
588 	/* Check if operation was processed successfully */
589 	for (i = 0; i < num_dequeued_ops; i++) {
590 
591 		/* We don't know the order or association of the crypto ops wrt any
592 		 * particular bdev_io so need to look at each and determine if it's
593 		 * the last one for it's bdev_io or not.
594 		 */
595 		bdev_io = (struct spdk_bdev_io *)*RTE_MBUF_DYNFIELD(dequeued_ops[i]->sym->m_src, g_mbuf_offset,
596 				uint64_t *);
597 		assert(bdev_io != NULL);
598 		io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
599 
600 		if (dequeued_ops[i]->status != RTE_CRYPTO_OP_STATUS_SUCCESS) {
601 			SPDK_ERRLOG("error with op %d status %u\n", i,
602 				    dequeued_ops[i]->status);
603 			/* Update the bdev status to error, we'll still process the
604 			 * rest of the crypto ops for this bdev_io though so they
605 			 * aren't left hanging.
606 			 */
607 			io_ctx->bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED;
608 		}
609 
610 		assert(io_ctx->cryop_cnt_remaining > 0);
611 
612 		/* Return the associated src and dst mbufs by collecting them into
613 		 * an array that we can use the bulk API to free after the loop.
614 		 */
615 		*RTE_MBUF_DYNFIELD(dequeued_ops[i]->sym->m_src, g_mbuf_offset, uint64_t *) = 0;
616 		mbufs_to_free[num_mbufs++] = (void *)dequeued_ops[i]->sym->m_src;
617 		if (dequeued_ops[i]->sym->m_dst) {
618 			mbufs_to_free[num_mbufs++] = (void *)dequeued_ops[i]->sym->m_dst;
619 		}
620 
621 		/* done encrypting, complete the bdev_io */
622 		if (--io_ctx->cryop_cnt_remaining == 0) {
623 
624 			/* If we're completing this with an outstanding reset we need
625 			 * to fail it.
626 			 */
627 			if (crypto_ch->iter) {
628 				io_ctx->bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED;
629 			}
630 
631 			/* Complete the IO */
632 			_crypto_operation_complete(bdev_io);
633 		}
634 	}
635 
636 	/* Now bulk free both mbufs and crypto operations. */
637 	if (num_dequeued_ops > 0) {
638 		rte_mempool_put_bulk(g_crypto_op_mp,
639 				     (void **)dequeued_ops,
640 				     num_dequeued_ops);
641 		assert(num_mbufs > 0);
642 		spdk_mempool_put_bulk(g_mbuf_mp,
643 				      (void **)mbufs_to_free,
644 				      num_mbufs);
645 	}
646 
647 	/* Check if there are any pending crypto ops to process */
648 	while (!TAILQ_EMPTY(&crypto_ch->queued_cry_ops)) {
649 		op_to_resubmit = TAILQ_FIRST(&crypto_ch->queued_cry_ops);
650 		io_ctx = (struct crypto_bdev_io *)op_to_resubmit->bdev_io->driver_ctx;
651 		num_enqueued_ops = rte_cryptodev_enqueue_burst(op_to_resubmit->cdev_id,
652 				   op_to_resubmit->qp,
653 				   &op_to_resubmit->crypto_op,
654 				   1);
655 		if (num_enqueued_ops == 1) {
656 			/* Make sure we don't put this on twice as one bdev_io is made up
657 			 * of many crypto ops.
658 			 */
659 			if (io_ctx->on_pending_list == false) {
660 				TAILQ_INSERT_TAIL(&crypto_ch->pending_cry_ios, op_to_resubmit->bdev_io, module_link);
661 				io_ctx->on_pending_list = true;
662 			}
663 			TAILQ_REMOVE(&crypto_ch->queued_cry_ops, op_to_resubmit, link);
664 		} else {
665 			/* if we couldn't get one, just break and try again later. */
666 			break;
667 		}
668 	}
669 
670 	/* If the channel iter is not NULL, we need to continue to poll
671 	 * until the pending list is empty, then we can move on to the
672 	 * next channel.
673 	 */
674 	if (crypto_ch->iter && TAILQ_EMPTY(&crypto_ch->pending_cry_ios)) {
675 		SPDK_NOTICELOG("Channel %p has been quiesced.\n", crypto_ch);
676 		spdk_for_each_channel_continue(crypto_ch->iter, 0);
677 		crypto_ch->iter = NULL;
678 	}
679 
680 	return num_dequeued_ops;
681 }
682 
683 /* We're either encrypting on the way down or decrypting on the way back. */
684 static int
685 _crypto_operation(struct spdk_bdev_io *bdev_io, enum rte_crypto_cipher_operation crypto_op,
686 		  void *aux_buf)
687 {
688 	uint16_t num_enqueued_ops = 0;
689 	uint32_t cryop_cnt = bdev_io->u.bdev.num_blocks;
690 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
691 	struct crypto_io_channel *crypto_ch = io_ctx->crypto_ch;
692 	uint8_t cdev_id = crypto_ch->device_qp->device->cdev_id;
693 	uint32_t crypto_len = io_ctx->crypto_bdev->crypto_bdev.blocklen;
694 	uint64_t total_length = bdev_io->u.bdev.num_blocks * crypto_len;
695 	int rc;
696 	uint32_t iov_index = 0;
697 	uint32_t allocated = 0;
698 	uint8_t *current_iov = NULL;
699 	uint64_t total_remaining = 0;
700 	uint64_t updated_length, current_iov_remaining = 0;
701 	uint32_t crypto_index = 0;
702 	uint32_t en_offset = 0;
703 	struct rte_crypto_op *crypto_ops[MAX_ENQUEUE_ARRAY_SIZE];
704 	struct rte_mbuf *src_mbufs[MAX_ENQUEUE_ARRAY_SIZE];
705 	struct rte_mbuf *dst_mbufs[MAX_ENQUEUE_ARRAY_SIZE];
706 	int burst;
707 	struct vbdev_crypto_op *op_to_queue;
708 	uint64_t alignment = spdk_bdev_get_buf_align(&io_ctx->crypto_bdev->crypto_bdev);
709 
710 	assert((bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen) <= CRYPTO_MAX_IO);
711 
712 	/* Get the number of source mbufs that we need. These will always be 1:1 because we
713 	 * don't support chaining. The reason we don't is because of our decision to use
714 	 * LBA as IV, there can be no case where we'd need >1 mbuf per crypto op or the
715 	 * op would be > 1 LBA.
716 	 */
717 	rc = spdk_mempool_get_bulk(g_mbuf_mp, (void **)&src_mbufs[0], cryop_cnt);
718 	if (rc) {
719 		SPDK_ERRLOG("ERROR trying to get src_mbufs!\n");
720 		return -ENOMEM;
721 	}
722 
723 	/* Get the same amount but these buffers to describe the encrypted data location (dst). */
724 	if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
725 		rc = spdk_mempool_get_bulk(g_mbuf_mp, (void **)&dst_mbufs[0], cryop_cnt);
726 		if (rc) {
727 			SPDK_ERRLOG("ERROR trying to get dst_mbufs!\n");
728 			rc = -ENOMEM;
729 			goto error_get_dst;
730 		}
731 	}
732 
733 #ifdef __clang_analyzer__
734 	/* silence scan-build false positive */
735 	SPDK_CLANG_ANALYZER_PREINIT_PTR_ARRAY(crypto_ops, MAX_ENQUEUE_ARRAY_SIZE, 0x1000);
736 #endif
737 	/* Allocate crypto operations. */
738 	allocated = rte_crypto_op_bulk_alloc(g_crypto_op_mp,
739 					     RTE_CRYPTO_OP_TYPE_SYMMETRIC,
740 					     crypto_ops, cryop_cnt);
741 	if (allocated < cryop_cnt) {
742 		SPDK_ERRLOG("ERROR trying to get crypto ops!\n");
743 		rc = -ENOMEM;
744 		goto error_get_ops;
745 	}
746 
747 	/* For encryption, we need to prepare a single contiguous buffer as the encryption
748 	 * destination, we'll then pass that along for the write after encryption is done.
749 	 * This is done to avoiding encrypting the provided write buffer which may be
750 	 * undesirable in some use cases.
751 	 */
752 	if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
753 		io_ctx->aux_buf_iov.iov_len = total_length;
754 		io_ctx->aux_buf_raw = aux_buf;
755 		io_ctx->aux_buf_iov.iov_base  = (void *)(((uintptr_t)aux_buf + (alignment - 1)) & ~(alignment - 1));
756 		io_ctx->aux_offset_blocks = bdev_io->u.bdev.offset_blocks;
757 		io_ctx->aux_num_blocks = bdev_io->u.bdev.num_blocks;
758 	}
759 
760 	/* This value is used in the completion callback to determine when the bdev_io is
761 	 * complete.
762 	 */
763 	io_ctx->cryop_cnt_remaining = cryop_cnt;
764 
765 	/* As we don't support chaining because of a decision to use LBA as IV, construction
766 	 * of crypto operations is straightforward. We build both the op, the mbuf and the
767 	 * dst_mbuf in our local arrays by looping through the length of the bdev IO and
768 	 * picking off LBA sized blocks of memory from the IOVs as we walk through them. Each
769 	 * LBA sized chunk of memory will correspond 1:1 to a crypto operation and a single
770 	 * mbuf per crypto operation.
771 	 */
772 	total_remaining = total_length;
773 	current_iov = bdev_io->u.bdev.iovs[iov_index].iov_base;
774 	current_iov_remaining = bdev_io->u.bdev.iovs[iov_index].iov_len;
775 	do {
776 		uint8_t *iv_ptr;
777 		uint64_t op_block_offset;
778 
779 		/* Set the mbuf elements address and length. Null out the next pointer. */
780 		src_mbufs[crypto_index]->buf_addr = current_iov;
781 		src_mbufs[crypto_index]->data_len = updated_length = crypto_len;
782 		/* TODO: Make this assignment conditional on QAT usage and add an assert. */
783 		src_mbufs[crypto_index]->buf_iova = spdk_vtophys((void *)current_iov, &updated_length);
784 		src_mbufs[crypto_index]->next = NULL;
785 		/* Store context in every mbuf as we don't know anything about completion order */
786 		*RTE_MBUF_DYNFIELD(src_mbufs[crypto_index], g_mbuf_offset, uint64_t *) = (uint64_t)bdev_io;
787 
788 		/* Set the IV - we use the LBA of the crypto_op */
789 		iv_ptr = rte_crypto_op_ctod_offset(crypto_ops[crypto_index], uint8_t *,
790 						   IV_OFFSET);
791 		memset(iv_ptr, 0, AES_CBC_IV_LENGTH);
792 		op_block_offset = bdev_io->u.bdev.offset_blocks + crypto_index;
793 		rte_memcpy(iv_ptr, &op_block_offset, sizeof(uint64_t));
794 
795 		/* Set the data to encrypt/decrypt length */
796 		crypto_ops[crypto_index]->sym->cipher.data.length = crypto_len;
797 		crypto_ops[crypto_index]->sym->cipher.data.offset = 0;
798 
799 		/* link the mbuf to the crypto op. */
800 		crypto_ops[crypto_index]->sym->m_src = src_mbufs[crypto_index];
801 		if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
802 			crypto_ops[crypto_index]->sym->m_dst = src_mbufs[crypto_index];
803 		} else {
804 			crypto_ops[crypto_index]->sym->m_dst = NULL;
805 		}
806 
807 		/* For encrypt, point the destination to a buffer we allocate and redirect the bdev_io
808 		 * that will be used to process the write on completion to the same buffer. Setting
809 		 * up the en_buffer is a little simpler as we know the destination buffer is single IOV.
810 		 */
811 		if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
812 
813 			/* Set the relevant destination en_mbuf elements. */
814 			dst_mbufs[crypto_index]->buf_addr = io_ctx->aux_buf_iov.iov_base + en_offset;
815 			dst_mbufs[crypto_index]->data_len = updated_length = crypto_len;
816 			/* TODO: Make this assignment conditional on QAT usage and add an assert. */
817 			dst_mbufs[crypto_index]->buf_iova = spdk_vtophys(dst_mbufs[crypto_index]->buf_addr,
818 							    &updated_length);
819 			crypto_ops[crypto_index]->sym->m_dst = dst_mbufs[crypto_index];
820 			en_offset += crypto_len;
821 			dst_mbufs[crypto_index]->next = NULL;
822 
823 			/* Attach the crypto session to the operation */
824 			rc = rte_crypto_op_attach_sym_session(crypto_ops[crypto_index],
825 							      io_ctx->crypto_bdev->session_encrypt);
826 			if (rc) {
827 				rc = -EINVAL;
828 				goto error_attach_session;
829 			}
830 
831 		} else {
832 			/* Attach the crypto session to the operation */
833 			rc = rte_crypto_op_attach_sym_session(crypto_ops[crypto_index],
834 							      io_ctx->crypto_bdev->session_decrypt);
835 			if (rc) {
836 				rc = -EINVAL;
837 				goto error_attach_session;
838 			}
839 
840 
841 		}
842 
843 		/* Subtract our running totals for the op in progress and the overall bdev io */
844 		total_remaining -= crypto_len;
845 		current_iov_remaining -= crypto_len;
846 
847 		/* move our current IOV pointer accordingly. */
848 		current_iov += crypto_len;
849 
850 		/* move on to the next crypto operation */
851 		crypto_index++;
852 
853 		/* If we're done with this IOV, move to the next one. */
854 		if (current_iov_remaining == 0 && total_remaining > 0) {
855 			iov_index++;
856 			current_iov = bdev_io->u.bdev.iovs[iov_index].iov_base;
857 			current_iov_remaining = bdev_io->u.bdev.iovs[iov_index].iov_len;
858 		}
859 	} while (total_remaining > 0);
860 
861 	/* Enqueue everything we've got but limit by the max number of descriptors we
862 	 * configured the crypto device for.
863 	 */
864 	burst = spdk_min(cryop_cnt, CRYPTO_QP_DESCRIPTORS);
865 	num_enqueued_ops = rte_cryptodev_enqueue_burst(cdev_id, crypto_ch->device_qp->qp,
866 			   &crypto_ops[0],
867 			   burst);
868 
869 	/* Add this bdev_io to our outstanding list if any of its crypto ops made it. */
870 	if (num_enqueued_ops > 0) {
871 		TAILQ_INSERT_TAIL(&crypto_ch->pending_cry_ios, bdev_io, module_link);
872 		io_ctx->on_pending_list = true;
873 	}
874 	/* We were unable to enqueue everything but did get some, so need to decide what
875 	 * to do based on the status of the last op.
876 	 */
877 	if (num_enqueued_ops < cryop_cnt) {
878 		switch (crypto_ops[num_enqueued_ops]->status) {
879 		case RTE_CRYPTO_OP_STATUS_NOT_PROCESSED:
880 			/* Queue them up on a linked list to be resubmitted via the poller. */
881 			for (crypto_index = num_enqueued_ops; crypto_index < cryop_cnt; crypto_index++) {
882 				op_to_queue = (struct vbdev_crypto_op *)rte_crypto_op_ctod_offset(crypto_ops[crypto_index],
883 						uint8_t *, QUEUED_OP_OFFSET);
884 				op_to_queue->cdev_id = cdev_id;
885 				op_to_queue->qp = crypto_ch->device_qp->qp;
886 				op_to_queue->crypto_op = crypto_ops[crypto_index];
887 				op_to_queue->bdev_io = bdev_io;
888 				TAILQ_INSERT_TAIL(&crypto_ch->queued_cry_ops,
889 						  op_to_queue,
890 						  link);
891 			}
892 			break;
893 		default:
894 			/* For all other statuses, set the io_ctx bdev_io status so that
895 			 * the poller will pick the failure up for the overall bdev status.
896 			 */
897 			io_ctx->bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED;
898 			if (num_enqueued_ops == 0) {
899 				/* If nothing was enqueued, but the last one wasn't because of
900 				 * busy, fail it now as the poller won't know anything about it.
901 				 */
902 				_crypto_operation_complete(bdev_io);
903 				rc = -EINVAL;
904 				goto error_attach_session;
905 			}
906 			break;
907 		}
908 	}
909 
910 	return rc;
911 
912 	/* Error cleanup paths. */
913 error_attach_session:
914 error_get_ops:
915 	if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
916 		spdk_mempool_put_bulk(g_mbuf_mp, (void **)&dst_mbufs[0],
917 				      cryop_cnt);
918 	}
919 	if (allocated > 0) {
920 		rte_mempool_put_bulk(g_crypto_op_mp, (void **)crypto_ops,
921 				     allocated);
922 	}
923 error_get_dst:
924 	spdk_mempool_put_bulk(g_mbuf_mp, (void **)&src_mbufs[0],
925 			      cryop_cnt);
926 	return rc;
927 }
928 
929 /* This function is called after all channels have been quiesced following
930  * a bdev reset.
931  */
932 static void
933 _ch_quiesce_done(struct spdk_io_channel_iter *i, int status)
934 {
935 	struct crypto_bdev_io *io_ctx = spdk_io_channel_iter_get_ctx(i);
936 
937 	assert(TAILQ_EMPTY(&io_ctx->crypto_ch->pending_cry_ios));
938 	assert(io_ctx->orig_io != NULL);
939 
940 	spdk_bdev_io_complete(io_ctx->orig_io, SPDK_BDEV_IO_STATUS_SUCCESS);
941 }
942 
943 /* This function is called per channel to quiesce IOs before completing a
944  * bdev reset that we received.
945  */
946 static void
947 _ch_quiesce(struct spdk_io_channel_iter *i)
948 {
949 	struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
950 	struct crypto_io_channel *crypto_ch = spdk_io_channel_get_ctx(ch);
951 
952 	crypto_ch->iter = i;
953 	/* When the poller runs, it will see the non-NULL iter and handle
954 	 * the quiesce.
955 	 */
956 }
957 
958 /* Completion callback for IO that were issued from this bdev other than read/write.
959  * They have their own for readability.
960  */
961 static void
962 _complete_internal_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
963 {
964 	struct spdk_bdev_io *orig_io = cb_arg;
965 	int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
966 
967 	if (bdev_io->type == SPDK_BDEV_IO_TYPE_RESET) {
968 		struct crypto_bdev_io *orig_ctx = (struct crypto_bdev_io *)orig_io->driver_ctx;
969 
970 		assert(orig_io == orig_ctx->orig_io);
971 
972 		spdk_bdev_free_io(bdev_io);
973 
974 		spdk_for_each_channel(orig_ctx->crypto_bdev,
975 				      _ch_quiesce,
976 				      orig_ctx,
977 				      _ch_quiesce_done);
978 		return;
979 	}
980 
981 	spdk_bdev_io_complete(orig_io, status);
982 	spdk_bdev_free_io(bdev_io);
983 }
984 
985 /* Completion callback for writes that were issued from this bdev. */
986 static void
987 _complete_internal_write(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
988 {
989 	struct spdk_bdev_io *orig_io = cb_arg;
990 	int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
991 	struct crypto_bdev_io *orig_ctx = (struct crypto_bdev_io *)orig_io->driver_ctx;
992 
993 	spdk_bdev_io_put_aux_buf(orig_io, orig_ctx->aux_buf_raw);
994 
995 	spdk_bdev_io_complete(orig_io, status);
996 	spdk_bdev_free_io(bdev_io);
997 }
998 
999 /* Completion callback for reads that were issued from this bdev. */
1000 static void
1001 _complete_internal_read(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
1002 {
1003 	struct spdk_bdev_io *orig_io = cb_arg;
1004 	struct crypto_bdev_io *orig_ctx = (struct crypto_bdev_io *)orig_io->driver_ctx;
1005 
1006 	if (success) {
1007 
1008 		/* Save off this bdev_io so it can be freed after decryption. */
1009 		orig_ctx->read_io = bdev_io;
1010 
1011 		if (!_crypto_operation(orig_io, RTE_CRYPTO_CIPHER_OP_DECRYPT, NULL)) {
1012 			return;
1013 		} else {
1014 			SPDK_ERRLOG("ERROR decrypting\n");
1015 		}
1016 	} else {
1017 		SPDK_ERRLOG("ERROR on read prior to decrypting\n");
1018 	}
1019 
1020 	spdk_bdev_io_complete(orig_io, SPDK_BDEV_IO_STATUS_FAILED);
1021 	spdk_bdev_free_io(bdev_io);
1022 }
1023 
1024 static void
1025 vbdev_crypto_resubmit_io(void *arg)
1026 {
1027 	struct spdk_bdev_io *bdev_io = (struct spdk_bdev_io *)arg;
1028 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
1029 
1030 	vbdev_crypto_submit_request(io_ctx->ch, bdev_io);
1031 }
1032 
1033 static void
1034 vbdev_crypto_queue_io(struct spdk_bdev_io *bdev_io)
1035 {
1036 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
1037 	int rc;
1038 
1039 	io_ctx->bdev_io_wait.bdev = bdev_io->bdev;
1040 	io_ctx->bdev_io_wait.cb_fn = vbdev_crypto_resubmit_io;
1041 	io_ctx->bdev_io_wait.cb_arg = bdev_io;
1042 
1043 	rc = spdk_bdev_queue_io_wait(bdev_io->bdev, io_ctx->crypto_ch->base_ch, &io_ctx->bdev_io_wait);
1044 	if (rc != 0) {
1045 		SPDK_ERRLOG("Queue io failed in vbdev_crypto_queue_io, rc=%d.\n", rc);
1046 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1047 	}
1048 }
1049 
1050 /* Callback for getting a buf from the bdev pool in the event that the caller passed
1051  * in NULL, we need to own the buffer so it doesn't get freed by another vbdev module
1052  * beneath us before we're done with it.
1053  */
1054 static void
1055 crypto_read_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
1056 		       bool success)
1057 {
1058 	struct vbdev_crypto *crypto_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_crypto,
1059 					   crypto_bdev);
1060 	struct crypto_io_channel *crypto_ch = spdk_io_channel_get_ctx(ch);
1061 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
1062 	int rc;
1063 
1064 	if (!success) {
1065 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1066 		return;
1067 	}
1068 
1069 	rc = spdk_bdev_readv_blocks(crypto_bdev->base_desc, crypto_ch->base_ch, bdev_io->u.bdev.iovs,
1070 				    bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.offset_blocks,
1071 				    bdev_io->u.bdev.num_blocks, _complete_internal_read,
1072 				    bdev_io);
1073 	if (rc != 0) {
1074 		if (rc == -ENOMEM) {
1075 			SPDK_DEBUGLOG(vbdev_crypto, "No memory, queue the IO.\n");
1076 			io_ctx->ch = ch;
1077 			vbdev_crypto_queue_io(bdev_io);
1078 		} else {
1079 			SPDK_ERRLOG("ERROR on bdev_io submission!\n");
1080 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1081 		}
1082 	}
1083 }
1084 
1085 /* For encryption we don't want to encrypt the data in place as the host isn't
1086  * expecting us to mangle its data buffers so we need to encrypt into the bdev
1087  * aux buffer, then we can use that as the source for the disk data transfer.
1088  */
1089 static void
1090 crypto_write_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
1091 			void *aux_buf)
1092 {
1093 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
1094 	int rc = 0;
1095 
1096 	rc = _crypto_operation(bdev_io, RTE_CRYPTO_CIPHER_OP_ENCRYPT, aux_buf);
1097 	if (rc != 0) {
1098 		spdk_bdev_io_put_aux_buf(bdev_io, aux_buf);
1099 		if (rc == -ENOMEM) {
1100 			SPDK_DEBUGLOG(vbdev_crypto, "No memory, queue the IO.\n");
1101 			io_ctx->ch = ch;
1102 			vbdev_crypto_queue_io(bdev_io);
1103 		} else {
1104 			SPDK_ERRLOG("ERROR on bdev_io submission!\n");
1105 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1106 		}
1107 	}
1108 }
1109 
1110 /* Called when someone submits IO to this crypto vbdev. For IO's not relevant to crypto,
1111  * we're simply passing it on here via SPDK IO calls which in turn allocate another bdev IO
1112  * and call our cpl callback provided below along with the original bdev_io so that we can
1113  * complete it once this IO completes. For crypto operations, we'll either encrypt it first
1114  * (writes) then call back into bdev to submit it or we'll submit a read and then catch it
1115  * on the way back for decryption.
1116  */
1117 static void
1118 vbdev_crypto_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
1119 {
1120 	struct vbdev_crypto *crypto_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_crypto,
1121 					   crypto_bdev);
1122 	struct crypto_io_channel *crypto_ch = spdk_io_channel_get_ctx(ch);
1123 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
1124 	int rc = 0;
1125 
1126 	memset(io_ctx, 0, sizeof(struct crypto_bdev_io));
1127 	io_ctx->crypto_bdev = crypto_bdev;
1128 	io_ctx->crypto_ch = crypto_ch;
1129 	io_ctx->orig_io = bdev_io;
1130 	io_ctx->bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
1131 
1132 	switch (bdev_io->type) {
1133 	case SPDK_BDEV_IO_TYPE_READ:
1134 		spdk_bdev_io_get_buf(bdev_io, crypto_read_get_buf_cb,
1135 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
1136 		break;
1137 	case SPDK_BDEV_IO_TYPE_WRITE:
1138 		/* Tell the bdev layer that we need an aux buf in addition to the data
1139 		 * buf already associated with the bdev.
1140 		 */
1141 		spdk_bdev_io_get_aux_buf(bdev_io, crypto_write_get_buf_cb);
1142 		break;
1143 	case SPDK_BDEV_IO_TYPE_UNMAP:
1144 		rc = spdk_bdev_unmap_blocks(crypto_bdev->base_desc, crypto_ch->base_ch,
1145 					    bdev_io->u.bdev.offset_blocks,
1146 					    bdev_io->u.bdev.num_blocks,
1147 					    _complete_internal_io, bdev_io);
1148 		break;
1149 	case SPDK_BDEV_IO_TYPE_FLUSH:
1150 		rc = spdk_bdev_flush_blocks(crypto_bdev->base_desc, crypto_ch->base_ch,
1151 					    bdev_io->u.bdev.offset_blocks,
1152 					    bdev_io->u.bdev.num_blocks,
1153 					    _complete_internal_io, bdev_io);
1154 		break;
1155 	case SPDK_BDEV_IO_TYPE_RESET:
1156 		rc = spdk_bdev_reset(crypto_bdev->base_desc, crypto_ch->base_ch,
1157 				     _complete_internal_io, bdev_io);
1158 		break;
1159 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
1160 	default:
1161 		SPDK_ERRLOG("crypto: unknown I/O type %d\n", bdev_io->type);
1162 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1163 		return;
1164 	}
1165 
1166 	if (rc != 0) {
1167 		if (rc == -ENOMEM) {
1168 			SPDK_DEBUGLOG(vbdev_crypto, "No memory, queue the IO.\n");
1169 			io_ctx->ch = ch;
1170 			vbdev_crypto_queue_io(bdev_io);
1171 		} else {
1172 			SPDK_ERRLOG("ERROR on bdev_io submission!\n");
1173 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1174 		}
1175 	}
1176 }
1177 
1178 /* We'll just call the base bdev and let it answer except for WZ command which
1179  * we always say we don't support so that the bdev layer will actually send us
1180  * real writes that we can encrypt.
1181  */
1182 static bool
1183 vbdev_crypto_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
1184 {
1185 	struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx;
1186 
1187 	switch (io_type) {
1188 	case SPDK_BDEV_IO_TYPE_WRITE:
1189 	case SPDK_BDEV_IO_TYPE_UNMAP:
1190 	case SPDK_BDEV_IO_TYPE_RESET:
1191 	case SPDK_BDEV_IO_TYPE_READ:
1192 	case SPDK_BDEV_IO_TYPE_FLUSH:
1193 		return spdk_bdev_io_type_supported(crypto_bdev->base_bdev, io_type);
1194 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
1195 	/* Force the bdev layer to issue actual writes of zeroes so we can
1196 	 * encrypt them as regular writes.
1197 	 */
1198 	default:
1199 		return false;
1200 	}
1201 }
1202 
1203 /* Callback for unregistering the IO device. */
1204 static void
1205 _device_unregister_cb(void *io_device)
1206 {
1207 	struct vbdev_crypto *crypto_bdev = io_device;
1208 
1209 	/* Done with this crypto_bdev. */
1210 	rte_cryptodev_sym_session_free(crypto_bdev->session_decrypt);
1211 	rte_cryptodev_sym_session_free(crypto_bdev->session_encrypt);
1212 	free(crypto_bdev->drv_name);
1213 	if (crypto_bdev->key) {
1214 		memset(crypto_bdev->key, 0, strnlen(crypto_bdev->key, (AES_CBC_KEY_LENGTH + 1)));
1215 		free(crypto_bdev->key);
1216 	}
1217 	if (crypto_bdev->key2) {
1218 		memset(crypto_bdev->key2, 0, strnlen(crypto_bdev->key2, (AES_XTS_KEY_LENGTH + 1)));
1219 		free(crypto_bdev->key2);
1220 	}
1221 	if (crypto_bdev->xts_key) {
1222 		memset(crypto_bdev->xts_key, 0, strnlen(crypto_bdev->xts_key, (AES_XTS_KEY_LENGTH * 2) + 1));
1223 		free(crypto_bdev->xts_key);
1224 	}
1225 	free(crypto_bdev->crypto_bdev.name);
1226 	free(crypto_bdev);
1227 }
1228 
1229 /* Wrapper for the bdev close operation. */
1230 static void
1231 _vbdev_crypto_destruct(void *ctx)
1232 {
1233 	struct spdk_bdev_desc *desc = ctx;
1234 
1235 	spdk_bdev_close(desc);
1236 }
1237 
1238 /* Called after we've unregistered following a hot remove callback.
1239  * Our finish entry point will be called next.
1240  */
1241 static int
1242 vbdev_crypto_destruct(void *ctx)
1243 {
1244 	struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx;
1245 
1246 	/* Remove this device from the internal list */
1247 	TAILQ_REMOVE(&g_vbdev_crypto, crypto_bdev, link);
1248 
1249 	/* Unclaim the underlying bdev. */
1250 	spdk_bdev_module_release_bdev(crypto_bdev->base_bdev);
1251 
1252 	/* Close the underlying bdev on its same opened thread. */
1253 	if (crypto_bdev->thread && crypto_bdev->thread != spdk_get_thread()) {
1254 		spdk_thread_send_msg(crypto_bdev->thread, _vbdev_crypto_destruct, crypto_bdev->base_desc);
1255 	} else {
1256 		spdk_bdev_close(crypto_bdev->base_desc);
1257 	}
1258 
1259 	/* Unregister the io_device. */
1260 	spdk_io_device_unregister(crypto_bdev, _device_unregister_cb);
1261 
1262 	g_number_of_claimed_volumes--;
1263 
1264 	return 0;
1265 }
1266 
1267 /* We supplied this as an entry point for upper layers who want to communicate to this
1268  * bdev.  This is how they get a channel. We are passed the same context we provided when
1269  * we created our crypto vbdev in examine() which, for this bdev, is the address of one of
1270  * our context nodes. From here we'll ask the SPDK channel code to fill out our channel
1271  * struct and we'll keep it in our crypto node.
1272  */
1273 static struct spdk_io_channel *
1274 vbdev_crypto_get_io_channel(void *ctx)
1275 {
1276 	struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx;
1277 
1278 	/* The IO channel code will allocate a channel for us which consists of
1279 	 * the SPDK channel structure plus the size of our crypto_io_channel struct
1280 	 * that we passed in when we registered our IO device. It will then call
1281 	 * our channel create callback to populate any elements that we need to
1282 	 * update.
1283 	 */
1284 	return spdk_get_io_channel(crypto_bdev);
1285 }
1286 
1287 /* This is the output for bdev_get_bdevs() for this vbdev */
1288 static int
1289 vbdev_crypto_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
1290 {
1291 	struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx;
1292 
1293 	spdk_json_write_name(w, "crypto");
1294 	spdk_json_write_object_begin(w);
1295 	spdk_json_write_named_string(w, "base_bdev_name", spdk_bdev_get_name(crypto_bdev->base_bdev));
1296 	spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&crypto_bdev->crypto_bdev));
1297 	spdk_json_write_named_string(w, "crypto_pmd", crypto_bdev->drv_name);
1298 	spdk_json_write_named_string(w, "key", crypto_bdev->key);
1299 	if (strcmp(crypto_bdev->cipher, AES_XTS) == 0) {
1300 		spdk_json_write_named_string(w, "key2", crypto_bdev->key);
1301 	}
1302 	spdk_json_write_named_string(w, "cipher", crypto_bdev->cipher);
1303 	spdk_json_write_object_end(w);
1304 	return 0;
1305 }
1306 
1307 static int
1308 vbdev_crypto_config_json(struct spdk_json_write_ctx *w)
1309 {
1310 	struct vbdev_crypto *crypto_bdev;
1311 
1312 	TAILQ_FOREACH(crypto_bdev, &g_vbdev_crypto, link) {
1313 		spdk_json_write_object_begin(w);
1314 		spdk_json_write_named_string(w, "method", "bdev_crypto_create");
1315 		spdk_json_write_named_object_begin(w, "params");
1316 		spdk_json_write_named_string(w, "base_bdev_name", spdk_bdev_get_name(crypto_bdev->base_bdev));
1317 		spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&crypto_bdev->crypto_bdev));
1318 		spdk_json_write_named_string(w, "crypto_pmd", crypto_bdev->drv_name);
1319 		spdk_json_write_named_string(w, "key", crypto_bdev->key);
1320 		if (strcmp(crypto_bdev->cipher, AES_XTS) == 0) {
1321 			spdk_json_write_named_string(w, "key2", crypto_bdev->key);
1322 		}
1323 		spdk_json_write_named_string(w, "cipher", crypto_bdev->cipher);
1324 		spdk_json_write_object_end(w);
1325 		spdk_json_write_object_end(w);
1326 	}
1327 	return 0;
1328 }
1329 
1330 /* Helper function for the channel creation callback. */
1331 static void
1332 _assign_device_qp(struct vbdev_crypto *crypto_bdev, struct device_qp *device_qp,
1333 		  struct crypto_io_channel *crypto_ch)
1334 {
1335 	pthread_mutex_lock(&g_device_qp_lock);
1336 	if (strcmp(crypto_bdev->drv_name, QAT) == 0) {
1337 		/* For some QAT devices, the optimal qp to use is every 32nd as this spreads the
1338 		 * workload out over the multiple virtual functions in the device. For the devices
1339 		 * where this isn't the case, it doesn't hurt.
1340 		 */
1341 		TAILQ_FOREACH(device_qp, &g_device_qp_qat, link) {
1342 			if (device_qp->index != g_next_qat_index) {
1343 				continue;
1344 			}
1345 			if (device_qp->in_use == false) {
1346 				crypto_ch->device_qp = device_qp;
1347 				device_qp->in_use = true;
1348 				g_next_qat_index = (g_next_qat_index + QAT_VF_SPREAD) % g_qat_total_qp;
1349 				break;
1350 			} else {
1351 				/* if the preferred index is used, skip to the next one in this set. */
1352 				g_next_qat_index = (g_next_qat_index + 1) % g_qat_total_qp;
1353 			}
1354 		}
1355 	} else if (strcmp(crypto_bdev->drv_name, AESNI_MB) == 0) {
1356 		TAILQ_FOREACH(device_qp, &g_device_qp_aesni_mb, link) {
1357 			if (device_qp->in_use == false) {
1358 				crypto_ch->device_qp = device_qp;
1359 				device_qp->in_use = true;
1360 				break;
1361 			}
1362 		}
1363 	}
1364 	pthread_mutex_unlock(&g_device_qp_lock);
1365 }
1366 
1367 /* We provide this callback for the SPDK channel code to create a channel using
1368  * the channel struct we provided in our module get_io_channel() entry point. Here
1369  * we get and save off an underlying base channel of the device below us so that
1370  * we can communicate with the base bdev on a per channel basis. We also register the
1371  * poller used to complete crypto operations from the device.
1372  */
1373 static int
1374 crypto_bdev_ch_create_cb(void *io_device, void *ctx_buf)
1375 {
1376 	struct crypto_io_channel *crypto_ch = ctx_buf;
1377 	struct vbdev_crypto *crypto_bdev = io_device;
1378 	struct device_qp *device_qp = NULL;
1379 
1380 	crypto_ch->base_ch = spdk_bdev_get_io_channel(crypto_bdev->base_desc);
1381 	crypto_ch->poller = SPDK_POLLER_REGISTER(crypto_dev_poller, crypto_ch, 0);
1382 	crypto_ch->device_qp = NULL;
1383 
1384 	/* Assign a device/qp combination that is unique per channel per PMD. */
1385 	_assign_device_qp(crypto_bdev, device_qp, crypto_ch);
1386 	assert(crypto_ch->device_qp);
1387 
1388 	/* We use this queue to track outstanding IO in our layer. */
1389 	TAILQ_INIT(&crypto_ch->pending_cry_ios);
1390 
1391 	/* We use this to queue up crypto ops when the device is busy. */
1392 	TAILQ_INIT(&crypto_ch->queued_cry_ops);
1393 
1394 	return 0;
1395 }
1396 
1397 /* We provide this callback for the SPDK channel code to destroy a channel
1398  * created with our create callback. We just need to undo anything we did
1399  * when we created.
1400  */
1401 static void
1402 crypto_bdev_ch_destroy_cb(void *io_device, void *ctx_buf)
1403 {
1404 	struct crypto_io_channel *crypto_ch = ctx_buf;
1405 
1406 	pthread_mutex_lock(&g_device_qp_lock);
1407 	crypto_ch->device_qp->in_use = false;
1408 	pthread_mutex_unlock(&g_device_qp_lock);
1409 
1410 	spdk_poller_unregister(&crypto_ch->poller);
1411 	spdk_put_io_channel(crypto_ch->base_ch);
1412 }
1413 
1414 /* Create the association from the bdev and vbdev name and insert
1415  * on the global list. */
1416 static int
1417 vbdev_crypto_insert_name(const char *bdev_name, const char *vbdev_name,
1418 			 const char *crypto_pmd, const char *key,
1419 			 const char *cipher, const char *key2)
1420 {
1421 	struct bdev_names *name;
1422 	int rc, j;
1423 	bool found = false;
1424 
1425 	TAILQ_FOREACH(name, &g_bdev_names, link) {
1426 		if (strcmp(vbdev_name, name->vbdev_name) == 0) {
1427 			SPDK_ERRLOG("crypto bdev %s already exists\n", vbdev_name);
1428 			return -EEXIST;
1429 		}
1430 	}
1431 
1432 	name = calloc(1, sizeof(struct bdev_names));
1433 	if (!name) {
1434 		SPDK_ERRLOG("could not allocate bdev_names\n");
1435 		return -ENOMEM;
1436 	}
1437 
1438 	name->bdev_name = strdup(bdev_name);
1439 	if (!name->bdev_name) {
1440 		SPDK_ERRLOG("could not allocate name->bdev_name\n");
1441 		rc = -ENOMEM;
1442 		goto error_alloc_bname;
1443 	}
1444 
1445 	name->vbdev_name = strdup(vbdev_name);
1446 	if (!name->vbdev_name) {
1447 		SPDK_ERRLOG("could not allocate name->vbdev_name\n");
1448 		rc = -ENOMEM;
1449 		goto error_alloc_vname;
1450 	}
1451 
1452 	name->drv_name = strdup(crypto_pmd);
1453 	if (!name->drv_name) {
1454 		SPDK_ERRLOG("could not allocate name->drv_name\n");
1455 		rc = -ENOMEM;
1456 		goto error_alloc_dname;
1457 	}
1458 	for (j = 0; j < MAX_NUM_DRV_TYPES ; j++) {
1459 		if (strcmp(crypto_pmd, g_driver_names[j]) == 0) {
1460 			found = true;
1461 			break;
1462 		}
1463 	}
1464 	if (!found) {
1465 		SPDK_ERRLOG("invalid crypto PMD type %s\n", crypto_pmd);
1466 		rc = -EINVAL;
1467 		goto error_invalid_pmd;
1468 	}
1469 
1470 	name->key = strdup(key);
1471 	if (!name->key) {
1472 		SPDK_ERRLOG("could not allocate name->key\n");
1473 		rc = -ENOMEM;
1474 		goto error_alloc_key;
1475 	}
1476 	if (strnlen(name->key, (AES_CBC_KEY_LENGTH + 1)) != AES_CBC_KEY_LENGTH) {
1477 		SPDK_ERRLOG("invalid AES_CBC key length\n");
1478 		rc = -EINVAL;
1479 		goto error_invalid_key;
1480 	}
1481 
1482 	if (strncmp(cipher, AES_XTS, sizeof(AES_XTS)) == 0) {
1483 		/* To please scan-build, input validation makes sure we can't
1484 		 * have this cipher without providing a key2.
1485 		 */
1486 		name->cipher = AES_XTS;
1487 		assert(key2);
1488 		if (strnlen(key2, (AES_XTS_KEY_LENGTH + 1)) != AES_XTS_KEY_LENGTH) {
1489 			SPDK_ERRLOG("invalid AES_XTS key length\n");
1490 			rc = -EINVAL;
1491 			goto error_invalid_key2;
1492 		}
1493 
1494 		name->key2 = strdup(key2);
1495 		if (!name->key2) {
1496 			SPDK_ERRLOG("could not allocate name->key2\n");
1497 			rc = -ENOMEM;
1498 			goto error_alloc_key2;
1499 		}
1500 	} else if (strncmp(cipher, AES_CBC, sizeof(AES_CBC)) == 0) {
1501 		name->cipher = AES_CBC;
1502 	} else {
1503 		SPDK_ERRLOG("Invalid cipher: %s\n", cipher);
1504 		rc = -EINVAL;
1505 		goto error_cipher;
1506 	}
1507 
1508 	TAILQ_INSERT_TAIL(&g_bdev_names, name, link);
1509 
1510 	return 0;
1511 
1512 	/* Error cleanup paths. */
1513 error_cipher:
1514 	free(name->key2);
1515 error_alloc_key2:
1516 error_invalid_key2:
1517 error_invalid_key:
1518 	free(name->key);
1519 error_alloc_key:
1520 error_invalid_pmd:
1521 	free(name->drv_name);
1522 error_alloc_dname:
1523 	free(name->vbdev_name);
1524 error_alloc_vname:
1525 	free(name->bdev_name);
1526 error_alloc_bname:
1527 	free(name);
1528 	return rc;
1529 }
1530 
1531 /* RPC entry point for crypto creation. */
1532 int
1533 create_crypto_disk(const char *bdev_name, const char *vbdev_name,
1534 		   const char *crypto_pmd, const char *key,
1535 		   const char *cipher, const char *key2)
1536 {
1537 	int rc;
1538 
1539 	rc = vbdev_crypto_insert_name(bdev_name, vbdev_name, crypto_pmd, key, cipher, key2);
1540 	if (rc) {
1541 		return rc;
1542 	}
1543 
1544 	rc = vbdev_crypto_claim(bdev_name);
1545 	if (rc == -ENODEV) {
1546 		SPDK_NOTICELOG("vbdev creation deferred pending base bdev arrival\n");
1547 		rc = 0;
1548 	}
1549 
1550 	return rc;
1551 }
1552 
1553 /* Called at driver init time, parses config file to prepare for examine calls,
1554  * also fully initializes the crypto drivers.
1555  */
1556 static int
1557 vbdev_crypto_init(void)
1558 {
1559 	int rc = 0;
1560 
1561 	/* Fully configure both SW and HW drivers. */
1562 	rc = vbdev_crypto_init_crypto_drivers();
1563 	if (rc) {
1564 		SPDK_ERRLOG("Error setting up crypto devices\n");
1565 	}
1566 
1567 	return rc;
1568 }
1569 
1570 /* Called when the entire module is being torn down. */
1571 static void
1572 vbdev_crypto_finish(void)
1573 {
1574 	struct bdev_names *name;
1575 	struct vbdev_dev *device;
1576 	struct device_qp *dev_qp;
1577 	int rc;
1578 
1579 	while ((name = TAILQ_FIRST(&g_bdev_names))) {
1580 		TAILQ_REMOVE(&g_bdev_names, name, link);
1581 		free(name->drv_name);
1582 		free(name->key);
1583 		free(name->bdev_name);
1584 		free(name->vbdev_name);
1585 		free(name->key2);
1586 		free(name);
1587 	}
1588 
1589 	while ((device = TAILQ_FIRST(&g_vbdev_devs))) {
1590 		TAILQ_REMOVE(&g_vbdev_devs, device, link);
1591 		rte_cryptodev_stop(device->cdev_id);
1592 		rc = rte_cryptodev_close(device->cdev_id);
1593 		assert(rc == 0);
1594 		free(device);
1595 	}
1596 
1597 	rc = rte_vdev_uninit(AESNI_MB);
1598 	if (rc) {
1599 		SPDK_ERRLOG("%d from rte_vdev_uninit\n", rc);
1600 	}
1601 
1602 	while ((dev_qp = TAILQ_FIRST(&g_device_qp_qat))) {
1603 		TAILQ_REMOVE(&g_device_qp_qat, dev_qp, link);
1604 		free(dev_qp);
1605 	}
1606 
1607 	while ((dev_qp = TAILQ_FIRST(&g_device_qp_aesni_mb))) {
1608 		TAILQ_REMOVE(&g_device_qp_aesni_mb, dev_qp, link);
1609 		free(dev_qp);
1610 	}
1611 
1612 	rte_mempool_free(g_crypto_op_mp);
1613 	spdk_mempool_free(g_mbuf_mp);
1614 	rte_mempool_free(g_session_mp);
1615 	if (g_session_mp_priv != NULL) {
1616 		rte_mempool_free(g_session_mp_priv);
1617 	}
1618 }
1619 
1620 /* During init we'll be asked how much memory we'd like passed to us
1621  * in bev_io structures as context. Here's where we specify how
1622  * much context we want per IO.
1623  */
1624 static int
1625 vbdev_crypto_get_ctx_size(void)
1626 {
1627 	return sizeof(struct crypto_bdev_io);
1628 }
1629 
1630 static void
1631 vbdev_crypto_base_bdev_hotremove_cb(struct spdk_bdev *bdev_find)
1632 {
1633 	struct vbdev_crypto *crypto_bdev, *tmp;
1634 
1635 	TAILQ_FOREACH_SAFE(crypto_bdev, &g_vbdev_crypto, link, tmp) {
1636 		if (bdev_find == crypto_bdev->base_bdev) {
1637 			spdk_bdev_unregister(&crypto_bdev->crypto_bdev, NULL, NULL);
1638 		}
1639 	}
1640 }
1641 
1642 /* Called when the underlying base bdev triggers asynchronous event such as bdev removal. */
1643 static void
1644 vbdev_crypto_base_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
1645 				void *event_ctx)
1646 {
1647 	switch (type) {
1648 	case SPDK_BDEV_EVENT_REMOVE:
1649 		vbdev_crypto_base_bdev_hotremove_cb(bdev);
1650 		break;
1651 	default:
1652 		SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type);
1653 		break;
1654 	}
1655 }
1656 
1657 static void
1658 vbdev_crypto_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
1659 {
1660 	/* No config per bdev needed */
1661 }
1662 
1663 /* When we register our bdev this is how we specify our entry points. */
1664 static const struct spdk_bdev_fn_table vbdev_crypto_fn_table = {
1665 	.destruct		= vbdev_crypto_destruct,
1666 	.submit_request		= vbdev_crypto_submit_request,
1667 	.io_type_supported	= vbdev_crypto_io_type_supported,
1668 	.get_io_channel		= vbdev_crypto_get_io_channel,
1669 	.dump_info_json		= vbdev_crypto_dump_info_json,
1670 	.write_config_json	= vbdev_crypto_write_config_json
1671 };
1672 
1673 static struct spdk_bdev_module crypto_if = {
1674 	.name = "crypto",
1675 	.module_init = vbdev_crypto_init,
1676 	.get_ctx_size = vbdev_crypto_get_ctx_size,
1677 	.examine_config = vbdev_crypto_examine,
1678 	.module_fini = vbdev_crypto_finish,
1679 	.config_json = vbdev_crypto_config_json
1680 };
1681 
1682 SPDK_BDEV_MODULE_REGISTER(crypto, &crypto_if)
1683 
1684 static int
1685 vbdev_crypto_claim(const char *bdev_name)
1686 {
1687 	struct bdev_names *name;
1688 	struct vbdev_crypto *vbdev;
1689 	struct vbdev_dev *device;
1690 	struct spdk_bdev *bdev;
1691 	bool found = false;
1692 	int rc = 0;
1693 
1694 	if (g_number_of_claimed_volumes >= MAX_CRYPTO_VOLUMES) {
1695 		SPDK_DEBUGLOG(vbdev_crypto, "Reached max number of claimed volumes\n");
1696 		rc = -EINVAL;
1697 		goto error_vbdev_alloc;
1698 	}
1699 	g_number_of_claimed_volumes++;
1700 
1701 	/* Check our list of names from config versus this bdev and if
1702 	 * there's a match, create the crypto_bdev & bdev accordingly.
1703 	 */
1704 	TAILQ_FOREACH(name, &g_bdev_names, link) {
1705 		if (strcmp(name->bdev_name, bdev_name) != 0) {
1706 			continue;
1707 		}
1708 		SPDK_DEBUGLOG(vbdev_crypto, "Match on %s\n", bdev_name);
1709 
1710 		vbdev = calloc(1, sizeof(struct vbdev_crypto));
1711 		if (!vbdev) {
1712 			SPDK_ERRLOG("could not allocate crypto_bdev\n");
1713 			rc = -ENOMEM;
1714 			goto error_vbdev_alloc;
1715 		}
1716 
1717 		vbdev->crypto_bdev.name = strdup(name->vbdev_name);
1718 		if (!vbdev->crypto_bdev.name) {
1719 			SPDK_ERRLOG("could not allocate crypto_bdev name\n");
1720 			rc = -ENOMEM;
1721 			goto error_bdev_name;
1722 		}
1723 
1724 		vbdev->key = strdup(name->key);
1725 		if (!vbdev->key) {
1726 			SPDK_ERRLOG("could not allocate crypto_bdev key\n");
1727 			rc = -ENOMEM;
1728 			goto error_alloc_key;
1729 		}
1730 
1731 		if (name->key2) {
1732 			vbdev->key2 = strdup(name->key2);
1733 			if (!vbdev->key2) {
1734 				SPDK_ERRLOG("could not allocate crypto_bdev key2\n");
1735 				rc = -ENOMEM;
1736 				goto error_alloc_key2;
1737 			}
1738 		}
1739 
1740 		vbdev->drv_name = strdup(name->drv_name);
1741 		if (!vbdev->drv_name) {
1742 			SPDK_ERRLOG("could not allocate crypto_bdev drv_name\n");
1743 			rc = -ENOMEM;
1744 			goto error_drv_name;
1745 		}
1746 
1747 		vbdev->crypto_bdev.product_name = "crypto";
1748 
1749 		rc = spdk_bdev_open_ext(bdev_name, true, vbdev_crypto_base_bdev_event_cb,
1750 					NULL, &vbdev->base_desc);
1751 		if (rc) {
1752 			if (rc != -ENODEV) {
1753 				SPDK_ERRLOG("could not open bdev %s\n", bdev_name);
1754 			}
1755 			goto error_open;
1756 		}
1757 
1758 		bdev = spdk_bdev_desc_get_bdev(vbdev->base_desc);
1759 		vbdev->base_bdev = bdev;
1760 
1761 		vbdev->crypto_bdev.write_cache = bdev->write_cache;
1762 		vbdev->cipher = AES_CBC;
1763 		if (strcmp(vbdev->drv_name, QAT) == 0) {
1764 			vbdev->crypto_bdev.required_alignment =
1765 				spdk_max(spdk_u32log2(bdev->blocklen), bdev->required_alignment);
1766 			SPDK_NOTICELOG("QAT in use: Required alignment set to %u\n",
1767 				       vbdev->crypto_bdev.required_alignment);
1768 			if (strcmp(name->cipher, AES_CBC) == 0) {
1769 				SPDK_NOTICELOG("QAT using cipher: AES_CBC\n");
1770 			} else {
1771 				SPDK_NOTICELOG("QAT using cipher: AES_XTS\n");
1772 				vbdev->cipher = AES_XTS;
1773 				/* DPDK expects they keys to be concatenated together. */
1774 				vbdev->xts_key = calloc(1, (AES_XTS_KEY_LENGTH * 2) + 1);
1775 				if (vbdev->xts_key == NULL) {
1776 					SPDK_ERRLOG("could not allocate memory for XTS key\n");
1777 					rc = -ENOMEM;
1778 					goto error_xts_key;
1779 				}
1780 				memcpy(vbdev->xts_key, vbdev->key, AES_XTS_KEY_LENGTH);
1781 				assert(name->key2);
1782 				memcpy(vbdev->xts_key + AES_XTS_KEY_LENGTH, name->key2, AES_XTS_KEY_LENGTH + 1);
1783 			}
1784 		} else {
1785 			vbdev->crypto_bdev.required_alignment = bdev->required_alignment;
1786 		}
1787 		/* Note: CRYPTO_MAX_IO is in units of bytes, optimal_io_boundary is
1788 		 * in units of blocks.
1789 		 */
1790 		if (bdev->optimal_io_boundary > 0) {
1791 			vbdev->crypto_bdev.optimal_io_boundary =
1792 				spdk_min((CRYPTO_MAX_IO / bdev->blocklen), bdev->optimal_io_boundary);
1793 		} else {
1794 			vbdev->crypto_bdev.optimal_io_boundary = (CRYPTO_MAX_IO / bdev->blocklen);
1795 		}
1796 		vbdev->crypto_bdev.split_on_optimal_io_boundary = true;
1797 		vbdev->crypto_bdev.blocklen = bdev->blocklen;
1798 		vbdev->crypto_bdev.blockcnt = bdev->blockcnt;
1799 
1800 		/* This is the context that is passed to us when the bdev
1801 		 * layer calls in so we'll save our crypto_bdev node here.
1802 		 */
1803 		vbdev->crypto_bdev.ctxt = vbdev;
1804 		vbdev->crypto_bdev.fn_table = &vbdev_crypto_fn_table;
1805 		vbdev->crypto_bdev.module = &crypto_if;
1806 		TAILQ_INSERT_TAIL(&g_vbdev_crypto, vbdev, link);
1807 
1808 		spdk_io_device_register(vbdev, crypto_bdev_ch_create_cb, crypto_bdev_ch_destroy_cb,
1809 					sizeof(struct crypto_io_channel), vbdev->crypto_bdev.name);
1810 
1811 		/* Save the thread where the base device is opened */
1812 		vbdev->thread = spdk_get_thread();
1813 
1814 		rc = spdk_bdev_module_claim_bdev(bdev, vbdev->base_desc, vbdev->crypto_bdev.module);
1815 		if (rc) {
1816 			SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(bdev));
1817 			goto error_claim;
1818 		}
1819 
1820 		/* To init the session we have to get the cryptoDev device ID for this vbdev */
1821 		TAILQ_FOREACH(device, &g_vbdev_devs, link) {
1822 			if (strcmp(device->cdev_info.driver_name, vbdev->drv_name) == 0) {
1823 				found = true;
1824 				break;
1825 			}
1826 		}
1827 		if (found == false) {
1828 			SPDK_ERRLOG("ERROR can't match crypto device driver to crypto vbdev!\n");
1829 			rc = -EINVAL;
1830 			goto error_cant_find_devid;
1831 		}
1832 
1833 		/* Get sessions. */
1834 		vbdev->session_encrypt = rte_cryptodev_sym_session_create(g_session_mp);
1835 		if (NULL == vbdev->session_encrypt) {
1836 			SPDK_ERRLOG("ERROR trying to create crypto session!\n");
1837 			rc = -EINVAL;
1838 			goto error_session_en_create;
1839 		}
1840 
1841 		vbdev->session_decrypt = rte_cryptodev_sym_session_create(g_session_mp);
1842 		if (NULL == vbdev->session_decrypt) {
1843 			SPDK_ERRLOG("ERROR trying to create crypto session!\n");
1844 			rc = -EINVAL;
1845 			goto error_session_de_create;
1846 		}
1847 
1848 		/* Init our per vbdev xform with the desired cipher options. */
1849 		vbdev->cipher_xform.type = RTE_CRYPTO_SYM_XFORM_CIPHER;
1850 		vbdev->cipher_xform.cipher.iv.offset = IV_OFFSET;
1851 		if (strcmp(name->cipher, AES_CBC) == 0) {
1852 			vbdev->cipher_xform.cipher.key.data = vbdev->key;
1853 			vbdev->cipher_xform.cipher.algo = RTE_CRYPTO_CIPHER_AES_CBC;
1854 			vbdev->cipher_xform.cipher.key.length = AES_CBC_KEY_LENGTH;
1855 		} else {
1856 			vbdev->cipher_xform.cipher.key.data = vbdev->xts_key;
1857 			vbdev->cipher_xform.cipher.algo = RTE_CRYPTO_CIPHER_AES_XTS;
1858 			vbdev->cipher_xform.cipher.key.length = AES_XTS_KEY_LENGTH * 2;
1859 		}
1860 		vbdev->cipher_xform.cipher.iv.length = AES_CBC_IV_LENGTH;
1861 
1862 		vbdev->cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT;
1863 		rc = rte_cryptodev_sym_session_init(device->cdev_id, vbdev->session_encrypt,
1864 						    &vbdev->cipher_xform,
1865 						    g_session_mp_priv ? g_session_mp_priv : g_session_mp);
1866 		if (rc < 0) {
1867 			SPDK_ERRLOG("ERROR trying to init encrypt session!\n");
1868 			rc = -EINVAL;
1869 			goto error_session_init;
1870 		}
1871 
1872 		vbdev->cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_DECRYPT;
1873 		rc = rte_cryptodev_sym_session_init(device->cdev_id, vbdev->session_decrypt,
1874 						    &vbdev->cipher_xform,
1875 						    g_session_mp_priv ? g_session_mp_priv : g_session_mp);
1876 		if (rc < 0) {
1877 			SPDK_ERRLOG("ERROR trying to init decrypt session!\n");
1878 			rc = -EINVAL;
1879 			goto error_session_init;
1880 		}
1881 
1882 		rc = spdk_bdev_register(&vbdev->crypto_bdev);
1883 		if (rc < 0) {
1884 			SPDK_ERRLOG("ERROR trying to register bdev\n");
1885 			rc = -EINVAL;
1886 			goto error_bdev_register;
1887 		}
1888 		SPDK_DEBUGLOG(vbdev_crypto, "registered io_device and virtual bdev for: %s\n",
1889 			      name->vbdev_name);
1890 		break;
1891 	}
1892 
1893 	return rc;
1894 
1895 	/* Error cleanup paths. */
1896 error_bdev_register:
1897 error_session_init:
1898 	rte_cryptodev_sym_session_free(vbdev->session_decrypt);
1899 error_session_de_create:
1900 	rte_cryptodev_sym_session_free(vbdev->session_encrypt);
1901 error_session_en_create:
1902 error_cant_find_devid:
1903 error_claim:
1904 	spdk_bdev_close(vbdev->base_desc);
1905 	TAILQ_REMOVE(&g_vbdev_crypto, vbdev, link);
1906 	spdk_io_device_unregister(vbdev, NULL);
1907 	free(vbdev->xts_key);
1908 error_xts_key:
1909 error_open:
1910 	free(vbdev->drv_name);
1911 error_drv_name:
1912 	free(vbdev->key2);
1913 error_alloc_key2:
1914 	free(vbdev->key);
1915 error_alloc_key:
1916 	free(vbdev->crypto_bdev.name);
1917 error_bdev_name:
1918 	free(vbdev);
1919 error_vbdev_alloc:
1920 	g_number_of_claimed_volumes--;
1921 	return rc;
1922 }
1923 
1924 /* RPC entry for deleting a crypto vbdev. */
1925 void
1926 delete_crypto_disk(struct spdk_bdev *bdev, spdk_delete_crypto_complete cb_fn,
1927 		   void *cb_arg)
1928 {
1929 	struct bdev_names *name;
1930 
1931 	if (!bdev || bdev->module != &crypto_if) {
1932 		cb_fn(cb_arg, -ENODEV);
1933 		return;
1934 	}
1935 
1936 	/* Remove the association (vbdev, bdev) from g_bdev_names. This is required so that the
1937 	 * vbdev does not get re-created if the same bdev is constructed at some other time,
1938 	 * unless the underlying bdev was hot-removed.
1939 	 */
1940 	TAILQ_FOREACH(name, &g_bdev_names, link) {
1941 		if (strcmp(name->vbdev_name, bdev->name) == 0) {
1942 			TAILQ_REMOVE(&g_bdev_names, name, link);
1943 			free(name->bdev_name);
1944 			free(name->vbdev_name);
1945 			free(name->drv_name);
1946 			free(name->key);
1947 			free(name->key2);
1948 			free(name);
1949 			break;
1950 		}
1951 	}
1952 
1953 	/* Additional cleanup happens in the destruct callback. */
1954 	spdk_bdev_unregister(bdev, cb_fn, cb_arg);
1955 }
1956 
1957 /* Because we specified this function in our crypto bdev function table when we
1958  * registered our crypto bdev, we'll get this call anytime a new bdev shows up.
1959  * Here we need to decide if we care about it and if so what to do. We
1960  * parsed the config file at init so we check the new bdev against the list
1961  * we built up at that time and if the user configured us to attach to this
1962  * bdev, here's where we do it.
1963  */
1964 static void
1965 vbdev_crypto_examine(struct spdk_bdev *bdev)
1966 {
1967 	vbdev_crypto_claim(spdk_bdev_get_name(bdev));
1968 	spdk_bdev_module_examine_done(&crypto_if);
1969 }
1970 
1971 SPDK_LOG_REGISTER_COMPONENT(vbdev_crypto)
1972