xref: /spdk/module/bdev/crypto/vbdev_crypto.c (revision 2505b938627faacb6bb99780c3d4595f7ebd323b)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUcryptoION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "vbdev_crypto.h"
35 
36 #include "spdk/env.h"
37 #include "spdk/conf.h"
38 #include "spdk/endian.h"
39 #include "spdk/io_channel.h"
40 #include "spdk/bdev_module.h"
41 #include "spdk_internal/log.h"
42 
43 #include <rte_config.h>
44 #include <rte_version.h>
45 #include <rte_bus_vdev.h>
46 #include <rte_crypto.h>
47 #include <rte_cryptodev.h>
48 #include <rte_cryptodev_pmd.h>
49 
50 /* To add support for new device types, follow the examples of the following...
51  * Note that the string names are defined by the DPDK PMD in question so be
52  * sure to use the exact names.
53  */
54 #define MAX_NUM_DRV_TYPES 2
55 
56 /* The VF spread is the number of queue pairs between virtual functions, we use this to
57  * load balance the QAT device.
58  */
59 #define QAT_VF_SPREAD 32
60 static uint8_t g_qat_total_qp = 0;
61 static uint8_t g_next_qat_index;
62 
63 const char *g_driver_names[MAX_NUM_DRV_TYPES] = { AESNI_MB, QAT };
64 
65 /* Global list of available crypto devices. */
66 struct vbdev_dev {
67 	struct rte_cryptodev_info	cdev_info;	/* includes device friendly name */
68 	uint8_t				cdev_id;	/* identifier for the device */
69 	TAILQ_ENTRY(vbdev_dev)		link;
70 };
71 static TAILQ_HEAD(, vbdev_dev) g_vbdev_devs = TAILQ_HEAD_INITIALIZER(g_vbdev_devs);
72 
73 /* Global list and lock for unique device/queue pair combos. We keep 1 list per supported PMD
74  * so that we can optimize per PMD where it make sense. For example, with QAT there an optimal
75  * pattern for assigning queue pairs where with AESNI there is not.
76  */
77 struct device_qp {
78 	struct vbdev_dev		*device;	/* ptr to crypto device */
79 	uint8_t				qp;		/* queue pair for this node */
80 	bool				in_use;		/* whether this node is in use or not */
81 	uint8_t				index;		/* used by QAT to load balance placement of qpairs */
82 	TAILQ_ENTRY(device_qp)		link;
83 };
84 static TAILQ_HEAD(, device_qp) g_device_qp_qat = TAILQ_HEAD_INITIALIZER(g_device_qp_qat);
85 static TAILQ_HEAD(, device_qp) g_device_qp_aesni_mb = TAILQ_HEAD_INITIALIZER(g_device_qp_aesni_mb);
86 static pthread_mutex_t g_device_qp_lock = PTHREAD_MUTEX_INITIALIZER;
87 
88 
89 /* In order to limit the number of resources we need to do one crypto
90  * operation per LBA (we use LBA as IV), we tell the bdev layer that
91  * our max IO size is something reasonable. Units here are in bytes.
92  */
93 #define CRYPTO_MAX_IO		(64 * 1024)
94 
95 /* This controls how many ops will be dequeued from the crypto driver in one run
96  * of the poller. It is mainly a performance knob as it effectively determines how
97  * much work the poller has to do.  However even that can vary between crypto drivers
98  * as the AESNI_MB driver for example does all the crypto work on dequeue whereas the
99  * QAT driver just dequeues what has been completed already.
100  */
101 #define MAX_DEQUEUE_BURST_SIZE	64
102 
103 /* When enqueueing, we need to supply the crypto driver with an array of pointers to
104  * operation structs. As each of these can be max 512B, we can adjust the CRYPTO_MAX_IO
105  * value in conjunction with the other defines to make sure we're not using crazy amounts
106  * of memory. All of these numbers can and probably should be adjusted based on the
107  * workload. By default we'll use the worst case (smallest) block size for the
108  * minimum number of array entries. As an example, a CRYPTO_MAX_IO size of 64K with 512B
109  * blocks would give us an enqueue array size of 128.
110  */
111 #define MAX_ENQUEUE_ARRAY_SIZE (CRYPTO_MAX_IO / 512)
112 
113 /* The number of MBUFS we need must be a power of two and to support other small IOs
114  * in addition to the limits mentioned above, we go to the next power of two. It is
115  * big number because it is one mempool for source and destination mbufs. It may
116  * need to be bigger to support multiple crypto drivers at once.
117  */
118 #define NUM_MBUFS		32768
119 #define POOL_CACHE_SIZE		256
120 #define MAX_CRYPTO_VOLUMES	128
121 #define NUM_SESSIONS		(2 * MAX_CRYPTO_VOLUMES)
122 #define SESS_MEMPOOL_CACHE_SIZE 0
123 uint8_t g_number_of_claimed_volumes = 0;
124 
125 /* This is the max number of IOs we can supply to any crypto device QP at one time.
126  * It can vary between drivers.
127  */
128 #define CRYPTO_QP_DESCRIPTORS	2048
129 
130 /* Specific to AES_CBC. */
131 #define AES_CBC_IV_LENGTH	16
132 #define AES_CBC_KEY_LENGTH	16
133 
134 /* Common for suported devices. */
135 #define IV_OFFSET            (sizeof(struct rte_crypto_op) + \
136 				sizeof(struct rte_crypto_sym_op))
137 #define QUEUED_OP_OFFSET (IV_OFFSET + AES_CBC_IV_LENGTH)
138 
139 static void _complete_internal_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
140 static void _complete_internal_read(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
141 static void _complete_internal_write(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
142 static void vbdev_crypto_examine(struct spdk_bdev *bdev);
143 static int vbdev_crypto_claim(struct spdk_bdev *bdev);
144 static void vbdev_crypto_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io);
145 
146 /* List of crypto_bdev names and their base bdevs via configuration file. */
147 struct bdev_names {
148 	char			*vbdev_name;	/* name of the vbdev to create */
149 	char			*bdev_name;	/* base bdev name */
150 
151 	/* Note, for dev/test we allow use of key in the config file, for production
152 	 * use, you must use an RPC to specify the key for security reasons.
153 	 */
154 	uint8_t			*key;		/* key per bdev */
155 	char			*drv_name;	/* name of the crypto device driver */
156 	TAILQ_ENTRY(bdev_names)	link;
157 };
158 static TAILQ_HEAD(, bdev_names) g_bdev_names = TAILQ_HEAD_INITIALIZER(g_bdev_names);
159 
160 /* List of virtual bdevs and associated info for each. We keep the device friendly name here even
161  * though its also in the device struct because we use it early on.
162  */
163 struct vbdev_crypto {
164 	struct spdk_bdev		*base_bdev;		/* the thing we're attaching to */
165 	struct spdk_bdev_desc		*base_desc;		/* its descriptor we get from open */
166 	struct spdk_bdev		crypto_bdev;		/* the crypto virtual bdev */
167 	uint8_t				*key;			/* key per bdev */
168 	char				*drv_name;		/* name of the crypto device driver */
169 	struct rte_cryptodev_sym_session *session_encrypt;	/* encryption session for this bdev */
170 	struct rte_cryptodev_sym_session *session_decrypt;	/* decryption session for this bdev */
171 	struct rte_crypto_sym_xform	cipher_xform;		/* crypto control struct for this bdev */
172 	TAILQ_ENTRY(vbdev_crypto)	link;
173 };
174 static TAILQ_HEAD(, vbdev_crypto) g_vbdev_crypto = TAILQ_HEAD_INITIALIZER(g_vbdev_crypto);
175 
176 /* Shared mempools between all devices on this system */
177 static struct rte_mempool *g_session_mp = NULL;
178 static struct rte_mempool *g_session_mp_priv = NULL;
179 static struct spdk_mempool *g_mbuf_mp = NULL;		/* mbuf mempool */
180 static struct rte_mempool *g_crypto_op_mp = NULL;	/* crypto operations, must be rte* mempool */
181 
182 /* For queueing up crypto operations that we can't submit for some reason */
183 struct vbdev_crypto_op {
184 	uint8_t					cdev_id;
185 	uint8_t					qp;
186 	struct rte_crypto_op			*crypto_op;
187 	struct spdk_bdev_io			*bdev_io;
188 	TAILQ_ENTRY(vbdev_crypto_op)		link;
189 };
190 #define QUEUED_OP_LENGTH (sizeof(struct vbdev_crypto_op))
191 
192 /* The crypto vbdev channel struct. It is allocated and freed on my behalf by the io channel code.
193  * We store things in here that are needed on per thread basis like the base_channel for this thread,
194  * and the poller for this thread.
195  */
196 struct crypto_io_channel {
197 	struct spdk_io_channel		*base_ch;		/* IO channel of base device */
198 	struct spdk_poller		*poller;		/* completion poller */
199 	struct device_qp		*device_qp;		/* unique device/qp combination for this channel */
200 	TAILQ_HEAD(, spdk_bdev_io)	pending_cry_ios;	/* outstanding operations to the crypto device */
201 	struct spdk_io_channel_iter	*iter;			/* used with for_each_channel in reset */
202 	TAILQ_HEAD(, vbdev_crypto_op)	queued_cry_ops;		/* queued for re-submission to CryptoDev */
203 };
204 
205 /* This is the crypto per IO context that the bdev layer allocates for us opaquely and attaches to
206  * each IO for us.
207  */
208 struct crypto_bdev_io {
209 	int cryop_cnt_remaining;			/* counter used when completing crypto ops */
210 	struct crypto_io_channel *crypto_ch;		/* need to store for crypto completion handling */
211 	struct vbdev_crypto *crypto_bdev;		/* the crypto node struct associated with this IO */
212 	struct spdk_bdev_io *orig_io;			/* the original IO */
213 	struct spdk_bdev_io *read_io;			/* the read IO we issued */
214 	int8_t bdev_io_status;				/* the status we'll report back on the bdev IO */
215 	bool on_pending_list;
216 	/* Used for the single contiguous buffer that serves as the crypto destination target for writes */
217 	uint64_t aux_num_blocks;			/* num of blocks for the contiguous buffer */
218 	uint64_t aux_offset_blocks;			/* block offset on media */
219 	void *aux_buf_raw;				/* raw buffer that the bdev layer gave us for write buffer */
220 	struct iovec aux_buf_iov;			/* iov representing aligned contig write buffer */
221 
222 	/* for bdev_io_wait */
223 	struct spdk_bdev_io_wait_entry bdev_io_wait;
224 	struct spdk_io_channel *ch;
225 };
226 
227 /* Called by vbdev_crypto_init_crypto_drivers() to init each discovered crypto device */
228 static int
229 create_vbdev_dev(uint8_t index, uint16_t num_lcores)
230 {
231 	struct vbdev_dev *device;
232 	uint8_t j, cdev_id, cdrv_id;
233 	struct device_qp *dev_qp;
234 	struct device_qp *tmp_qp;
235 	int rc;
236 	TAILQ_HEAD(device_qps, device_qp) *dev_qp_head;
237 
238 	device = calloc(1, sizeof(struct vbdev_dev));
239 	if (!device) {
240 		return -ENOMEM;
241 	}
242 
243 	/* Get details about this device. */
244 	rte_cryptodev_info_get(index, &device->cdev_info);
245 	cdrv_id = device->cdev_info.driver_id;
246 	cdev_id = device->cdev_id = index;
247 
248 	/* Before going any further, make sure we have enough resources for this
249 	 * device type to function.  We need a unique queue pair per core accross each
250 	 * device type to remain lockless....
251 	 */
252 	if ((rte_cryptodev_device_count_by_driver(cdrv_id) *
253 	     device->cdev_info.max_nb_queue_pairs) < num_lcores) {
254 		SPDK_ERRLOG("Insufficient unique queue pairs available for %s\n",
255 			    device->cdev_info.driver_name);
256 		SPDK_ERRLOG("Either add more crypto devices or decrease core count\n");
257 		rc = -EINVAL;
258 		goto err;
259 	}
260 
261 	/* Setup queue pairs. */
262 	struct rte_cryptodev_config conf = {
263 		.nb_queue_pairs = device->cdev_info.max_nb_queue_pairs,
264 		.socket_id = SPDK_ENV_SOCKET_ID_ANY
265 	};
266 
267 	rc = rte_cryptodev_configure(cdev_id, &conf);
268 	if (rc < 0) {
269 		SPDK_ERRLOG("Failed to configure cryptodev %u\n", cdev_id);
270 		rc = -EINVAL;
271 		goto err;
272 	}
273 
274 	struct rte_cryptodev_qp_conf qp_conf = {
275 		.nb_descriptors = CRYPTO_QP_DESCRIPTORS,
276 #if RTE_VERSION >= RTE_VERSION_NUM(19, 02, 0, 0)
277 		.mp_session = g_session_mp,
278 		.mp_session_private = g_session_mp_priv,
279 #endif
280 	};
281 
282 	/* Pre-setup all potential qpairs now and assign them in the channel
283 	 * callback. If we were to create them there, we'd have to stop the
284 	 * entire device affecting all other threads that might be using it
285 	 * even on other queue pairs.
286 	 */
287 	for (j = 0; j < device->cdev_info.max_nb_queue_pairs; j++) {
288 #if RTE_VERSION >= RTE_VERSION_NUM(19, 02, 0, 0)
289 		rc = rte_cryptodev_queue_pair_setup(cdev_id, j, &qp_conf, SOCKET_ID_ANY);
290 #else
291 		rc = rte_cryptodev_queue_pair_setup(cdev_id, j, &qp_conf, SOCKET_ID_ANY,
292 						    g_session_mp);
293 #endif
294 
295 		if (rc < 0) {
296 			SPDK_ERRLOG("Failed to setup queue pair %u on "
297 				    "cryptodev %u\n", j, cdev_id);
298 			rc = -EINVAL;
299 			goto err;
300 		}
301 	}
302 
303 	rc = rte_cryptodev_start(cdev_id);
304 	if (rc < 0) {
305 		SPDK_ERRLOG("Failed to start device %u: error %d\n",
306 			    cdev_id, rc);
307 		rc = -EINVAL;
308 		goto err;
309 	}
310 
311 	/* Select the right device/qp list based on driver name
312 	 * or error if it does not exist.
313 	 */
314 	if (strcmp(device->cdev_info.driver_name, QAT) == 0) {
315 		dev_qp_head = (struct device_qps *)&g_device_qp_qat;
316 	} else if (strcmp(device->cdev_info.driver_name, AESNI_MB) == 0) {
317 		dev_qp_head = (struct device_qps *)&g_device_qp_aesni_mb;
318 	} else {
319 		rc = -EINVAL;
320 		goto err;
321 	}
322 
323 	/* Build up lists of device/qp combinations per PMD */
324 	for (j = 0; j < device->cdev_info.max_nb_queue_pairs; j++) {
325 		dev_qp = calloc(1, sizeof(struct device_qp));
326 		if (!dev_qp) {
327 			rc = -ENOMEM;
328 			goto err_qp_alloc;
329 		}
330 		dev_qp->device = device;
331 		dev_qp->qp = j;
332 		dev_qp->in_use = false;
333 		if (strcmp(device->cdev_info.driver_name, QAT) == 0) {
334 			g_qat_total_qp++;
335 		}
336 		TAILQ_INSERT_TAIL(dev_qp_head, dev_qp, link);
337 	}
338 
339 	/* Add to our list of available crypto devices. */
340 	TAILQ_INSERT_TAIL(&g_vbdev_devs, device, link);
341 
342 	return 0;
343 err_qp_alloc:
344 	TAILQ_FOREACH_SAFE(dev_qp, dev_qp_head, link, tmp_qp) {
345 		TAILQ_REMOVE(dev_qp_head, dev_qp, link);
346 		free(dev_qp);
347 	}
348 err:
349 	free(device);
350 
351 	return rc;
352 }
353 
354 /* This is called from the module's init function. We setup all crypto devices early on as we are unable
355  * to easily dynamically configure queue pairs after the drivers are up and running.  So, here, we
356  * configure the max capabilities of each device and assign threads to queue pairs as channels are
357  * requested.
358  */
359 static int
360 vbdev_crypto_init_crypto_drivers(void)
361 {
362 	uint8_t cdev_count;
363 	uint8_t cdev_id;
364 	int i, rc = 0;
365 	struct vbdev_dev *device;
366 	struct vbdev_dev *tmp_dev;
367 	struct device_qp *dev_qp;
368 	unsigned int max_sess_size = 0, sess_size;
369 	uint16_t num_lcores = rte_lcore_count();
370 
371 	/* Only the first call, via RPC or module init should init the crypto drivers. */
372 	if (g_session_mp != NULL) {
373 		return 0;
374 	}
375 
376 	/* We always init AESNI_MB */
377 	rc = rte_vdev_init(AESNI_MB, NULL);
378 	if (rc) {
379 		SPDK_ERRLOG("error creating virtual PMD %s\n", AESNI_MB);
380 		return -EINVAL;
381 	}
382 
383 	/* If we have no crypto devices, there's no reason to continue. */
384 	cdev_count = rte_cryptodev_count();
385 	if (cdev_count == 0) {
386 		return 0;
387 	}
388 
389 	/*
390 	 * Create global mempools, shared by all devices regardless of type.
391 	 */
392 
393 	/* First determine max session size, most pools are shared by all the devices,
394 	 * so we need to find the global max sessions size.
395 	 */
396 	for (cdev_id = 0; cdev_id < cdev_count; cdev_id++) {
397 		sess_size = rte_cryptodev_sym_get_private_session_size(cdev_id);
398 		if (sess_size > max_sess_size) {
399 			max_sess_size = sess_size;
400 		}
401 	}
402 
403 #if RTE_VERSION >= RTE_VERSION_NUM(19, 02, 0, 0)
404 	g_session_mp_priv = rte_mempool_create("session_mp_priv", NUM_SESSIONS, max_sess_size,
405 					       SESS_MEMPOOL_CACHE_SIZE, 0, NULL, NULL, NULL,
406 					       NULL, SOCKET_ID_ANY, 0);
407 	if (g_session_mp_priv == NULL) {
408 		SPDK_ERRLOG("Cannot create private session pool max size 0x%x\n", max_sess_size);
409 		return -ENOMEM;
410 	}
411 
412 	g_session_mp = rte_cryptodev_sym_session_pool_create(
413 			       "session_mp",
414 			       NUM_SESSIONS, 0, SESS_MEMPOOL_CACHE_SIZE, 0,
415 			       SOCKET_ID_ANY);
416 #else
417 	g_session_mp = rte_mempool_create("session_mp", NUM_SESSIONS, max_sess_size,
418 					  SESS_MEMPOOL_CACHE_SIZE,
419 					  0, NULL, NULL, NULL, NULL, SOCKET_ID_ANY, 0);
420 #endif
421 	if (g_session_mp == NULL) {
422 		SPDK_ERRLOG("Cannot create session pool max size 0x%x\n", max_sess_size);
423 		goto error_create_session_mp;
424 		return -ENOMEM;
425 	}
426 
427 	g_mbuf_mp = spdk_mempool_create("mbuf_mp", NUM_MBUFS, sizeof(struct rte_mbuf),
428 					SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
429 					SPDK_ENV_SOCKET_ID_ANY);
430 	if (g_mbuf_mp == NULL) {
431 		SPDK_ERRLOG("Cannot create mbuf pool\n");
432 		rc = -ENOMEM;
433 		goto error_create_mbuf;
434 	}
435 
436 	/* We use per op private data to store the IV and our own struct
437 	 * for queueing ops.
438 	 */
439 	g_crypto_op_mp = rte_crypto_op_pool_create("op_mp",
440 			 RTE_CRYPTO_OP_TYPE_SYMMETRIC,
441 			 NUM_MBUFS,
442 			 POOL_CACHE_SIZE,
443 			 AES_CBC_IV_LENGTH + QUEUED_OP_LENGTH,
444 			 rte_socket_id());
445 
446 	if (g_crypto_op_mp == NULL) {
447 		SPDK_ERRLOG("Cannot create op pool\n");
448 		rc = -ENOMEM;
449 		goto error_create_op;
450 	}
451 
452 	/* Init all devices */
453 	for (i = 0; i < cdev_count; i++) {
454 		rc = create_vbdev_dev(i, num_lcores);
455 		if (rc) {
456 			goto err;
457 		}
458 	}
459 
460 	/* Assign index values to the QAT device qp nodes so that we can
461 	 * assign them for optimal performance.
462 	 */
463 	i = 0;
464 	TAILQ_FOREACH(dev_qp, &g_device_qp_qat, link) {
465 		dev_qp->index = i++;
466 	}
467 
468 	return 0;
469 
470 	/* Error cleanup paths. */
471 err:
472 	TAILQ_FOREACH_SAFE(device, &g_vbdev_devs, link, tmp_dev) {
473 		TAILQ_REMOVE(&g_vbdev_devs, device, link);
474 		free(device);
475 	}
476 	rte_mempool_free(g_crypto_op_mp);
477 	g_crypto_op_mp = NULL;
478 error_create_op:
479 	spdk_mempool_free(g_mbuf_mp);
480 	g_mbuf_mp = NULL;
481 error_create_mbuf:
482 	rte_mempool_free(g_session_mp);
483 	g_session_mp = NULL;
484 error_create_session_mp:
485 	if (g_session_mp_priv != NULL) {
486 		rte_mempool_free(g_session_mp_priv);
487 		g_session_mp_priv = NULL;
488 	}
489 	return rc;
490 }
491 
492 /* Following an encrypt or decrypt we need to then either write the encrypted data or finish
493  * the read on decrypted data. Do that here.
494  */
495 static void
496 _crypto_operation_complete(struct spdk_bdev_io *bdev_io)
497 {
498 	struct vbdev_crypto *crypto_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_crypto,
499 					   crypto_bdev);
500 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
501 	struct crypto_io_channel *crypto_ch = io_ctx->crypto_ch;
502 	struct spdk_bdev_io *free_me = io_ctx->read_io;
503 	int rc = 0;
504 
505 	TAILQ_REMOVE(&crypto_ch->pending_cry_ios, bdev_io, module_link);
506 
507 	if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
508 
509 		/* Complete the original IO and then free the one that we created
510 		 * as a result of issuing an IO via submit_request.
511 		 */
512 		if (io_ctx->bdev_io_status != SPDK_BDEV_IO_STATUS_FAILED) {
513 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
514 		} else {
515 			SPDK_ERRLOG("Issue with decryption on bdev_io %p\n", bdev_io);
516 			rc = -EINVAL;
517 		}
518 		spdk_bdev_free_io(free_me);
519 
520 	} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
521 
522 		if (io_ctx->bdev_io_status != SPDK_BDEV_IO_STATUS_FAILED) {
523 			/* Write the encrypted data. */
524 			rc = spdk_bdev_writev_blocks(crypto_bdev->base_desc, crypto_ch->base_ch,
525 						     &io_ctx->aux_buf_iov, 1, io_ctx->aux_offset_blocks,
526 						     io_ctx->aux_num_blocks, _complete_internal_write,
527 						     bdev_io);
528 		} else {
529 			SPDK_ERRLOG("Issue with encryption on bdev_io %p\n", bdev_io);
530 			rc = -EINVAL;
531 		}
532 
533 	} else {
534 		SPDK_ERRLOG("Unknown bdev type %u on crypto operation completion\n",
535 			    bdev_io->type);
536 		rc = -EINVAL;
537 	}
538 
539 	if (rc) {
540 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
541 	}
542 }
543 
544 static int _crypto_operation(struct spdk_bdev_io *bdev_io,
545 			     enum rte_crypto_cipher_operation crypto_op,
546 			     void *aux_buf);
547 
548 /* This is the poller for the crypto device. It uses a single API to dequeue whatever is ready at
549  * the device. Then we need to decide if what we've got so far (including previous poller
550  * runs) totals up to one or more complete bdev_ios and if so continue with the bdev_io
551  * accordingly. This means either completing a read or issuing a new write.
552  */
553 static int
554 crypto_dev_poller(void *args)
555 {
556 	struct crypto_io_channel *crypto_ch = args;
557 	uint8_t cdev_id = crypto_ch->device_qp->device->cdev_id;
558 	int i, num_dequeued_ops, num_enqueued_ops;
559 	struct spdk_bdev_io *bdev_io = NULL;
560 	struct crypto_bdev_io *io_ctx = NULL;
561 	struct rte_crypto_op *dequeued_ops[MAX_DEQUEUE_BURST_SIZE];
562 	struct rte_crypto_op *mbufs_to_free[2 * MAX_DEQUEUE_BURST_SIZE];
563 	int num_mbufs = 0;
564 	struct vbdev_crypto_op *op_to_resubmit;
565 
566 	/* Each run of the poller will get just what the device has available
567 	 * at the moment we call it, we don't check again after draining the
568 	 * first batch.
569 	 */
570 	num_dequeued_ops = rte_cryptodev_dequeue_burst(cdev_id, crypto_ch->device_qp->qp,
571 			   dequeued_ops, MAX_DEQUEUE_BURST_SIZE);
572 
573 	/* Check if operation was processed successfully */
574 	for (i = 0; i < num_dequeued_ops; i++) {
575 
576 		/* We don't know the order or association of the crypto ops wrt any
577 		 * partiular bdev_io so need to look at each and determine if it's
578 		 * the last one for it's bdev_io or not.
579 		 */
580 		bdev_io = (struct spdk_bdev_io *)dequeued_ops[i]->sym->m_src->userdata;
581 		assert(bdev_io != NULL);
582 		io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
583 
584 		if (dequeued_ops[i]->status != RTE_CRYPTO_OP_STATUS_SUCCESS) {
585 			SPDK_ERRLOG("error with op %d status %u\n", i,
586 				    dequeued_ops[i]->status);
587 			/* Update the bdev status to error, we'll still process the
588 			 * rest of the crypto ops for this bdev_io though so they
589 			 * aren't left hanging.
590 			 */
591 			io_ctx->bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED;
592 		}
593 
594 		assert(io_ctx->cryop_cnt_remaining > 0);
595 
596 		/* Return the associated src and dst mbufs by collecting them into
597 		 * an array that we can use the bulk API to free after the loop.
598 		 */
599 		dequeued_ops[i]->sym->m_src->userdata = NULL;
600 		mbufs_to_free[num_mbufs++] = (void *)dequeued_ops[i]->sym->m_src;
601 		if (dequeued_ops[i]->sym->m_dst) {
602 			mbufs_to_free[num_mbufs++] = (void *)dequeued_ops[i]->sym->m_dst;
603 		}
604 
605 		/* done encrypting, complete the bdev_io */
606 		if (--io_ctx->cryop_cnt_remaining == 0) {
607 
608 			/* If we're completing this with an outstanding reset we need
609 			 * to fail it.
610 			 */
611 			if (crypto_ch->iter) {
612 				bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
613 			}
614 
615 			/* Complete the IO */
616 			_crypto_operation_complete(bdev_io);
617 		}
618 	}
619 
620 	/* Now bulk free both mbufs and crypto operations. */
621 	if (num_dequeued_ops > 0) {
622 		rte_mempool_put_bulk(g_crypto_op_mp,
623 				     (void **)dequeued_ops,
624 				     num_dequeued_ops);
625 		assert(num_mbufs > 0);
626 		spdk_mempool_put_bulk(g_mbuf_mp,
627 				      (void **)mbufs_to_free,
628 				      num_mbufs);
629 	}
630 
631 	/* Check if there are any pending crypto ops to process */
632 	while (!TAILQ_EMPTY(&crypto_ch->queued_cry_ops)) {
633 		op_to_resubmit = TAILQ_FIRST(&crypto_ch->queued_cry_ops);
634 		io_ctx = (struct crypto_bdev_io *)op_to_resubmit->bdev_io->driver_ctx;
635 		num_enqueued_ops = rte_cryptodev_enqueue_burst(op_to_resubmit->cdev_id,
636 				   op_to_resubmit->qp,
637 				   &op_to_resubmit->crypto_op,
638 				   1);
639 		if (num_enqueued_ops == 1) {
640 			/* Make sure we don't put this on twice as one bdev_io is made up
641 			 * of many crypto ops.
642 			 */
643 			if (io_ctx->on_pending_list == false) {
644 				TAILQ_INSERT_TAIL(&crypto_ch->pending_cry_ios, op_to_resubmit->bdev_io, module_link);
645 				io_ctx->on_pending_list = true;
646 			}
647 			TAILQ_REMOVE(&crypto_ch->queued_cry_ops, op_to_resubmit, link);
648 		} else {
649 			/* if we couldn't get one, just break and try again later. */
650 			break;
651 		}
652 	}
653 
654 	/* If the channel iter is not NULL, we need to continue to poll
655 	 * until the pending list is empty, then we can move on to the
656 	 * next channel.
657 	 */
658 	if (crypto_ch->iter && TAILQ_EMPTY(&crypto_ch->pending_cry_ios)) {
659 		SPDK_NOTICELOG("Channel %p has been quiesced.\n", crypto_ch);
660 		spdk_for_each_channel_continue(crypto_ch->iter, 0);
661 		crypto_ch->iter = NULL;
662 	}
663 
664 	return num_dequeued_ops;
665 }
666 
667 /* We're either encrypting on the way down or decrypting on the way back. */
668 static int
669 _crypto_operation(struct spdk_bdev_io *bdev_io, enum rte_crypto_cipher_operation crypto_op,
670 		  void *aux_buf)
671 {
672 	uint16_t num_enqueued_ops = 0;
673 	uint32_t cryop_cnt = bdev_io->u.bdev.num_blocks;
674 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
675 	struct crypto_io_channel *crypto_ch = io_ctx->crypto_ch;
676 	uint8_t cdev_id = crypto_ch->device_qp->device->cdev_id;
677 	uint32_t crypto_len = io_ctx->crypto_bdev->crypto_bdev.blocklen;
678 	uint64_t total_length = bdev_io->u.bdev.num_blocks * crypto_len;
679 	int rc;
680 	uint32_t iov_index = 0;
681 	uint32_t allocated = 0;
682 	uint8_t *current_iov = NULL;
683 	uint64_t total_remaining = 0;
684 	uint64_t updated_length, current_iov_remaining = 0;
685 	uint32_t crypto_index = 0;
686 	uint32_t en_offset = 0;
687 	struct rte_crypto_op *crypto_ops[MAX_ENQUEUE_ARRAY_SIZE];
688 	struct rte_mbuf *src_mbufs[MAX_ENQUEUE_ARRAY_SIZE];
689 	struct rte_mbuf *dst_mbufs[MAX_ENQUEUE_ARRAY_SIZE];
690 	int burst;
691 	struct vbdev_crypto_op *op_to_queue;
692 	uint64_t alignment = spdk_bdev_get_buf_align(&io_ctx->crypto_bdev->crypto_bdev);
693 
694 	assert((bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen) <= CRYPTO_MAX_IO);
695 
696 	/* Get the number of source mbufs that we need. These will always be 1:1 because we
697 	 * don't support chaining. The reason we don't is because of our decision to use
698 	 * LBA as IV, there can be no case where we'd need >1 mbuf per crypto op or the
699 	 * op would be > 1 LBA.
700 	 */
701 	rc = spdk_mempool_get_bulk(g_mbuf_mp, (void **)&src_mbufs[0], cryop_cnt);
702 	if (rc) {
703 		SPDK_ERRLOG("ERROR trying to get src_mbufs!\n");
704 		return -ENOMEM;
705 	}
706 
707 	/* Get the same amount but these buffers to describe the encrypted data location (dst). */
708 	if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
709 		rc = spdk_mempool_get_bulk(g_mbuf_mp, (void **)&dst_mbufs[0], cryop_cnt);
710 		if (rc) {
711 			SPDK_ERRLOG("ERROR trying to get dst_mbufs!\n");
712 			rc = -ENOMEM;
713 			goto error_get_dst;
714 		}
715 	}
716 
717 #ifdef __clang_analyzer__
718 	/* silence scan-build false positive */
719 	SPDK_CLANG_ANALYZER_PREINIT_PTR_ARRAY(crypto_ops, MAX_ENQUEUE_ARRAY_SIZE, 0x1000);
720 #endif
721 	/* Allocate crypto operations. */
722 	allocated = rte_crypto_op_bulk_alloc(g_crypto_op_mp,
723 					     RTE_CRYPTO_OP_TYPE_SYMMETRIC,
724 					     crypto_ops, cryop_cnt);
725 	if (allocated < cryop_cnt) {
726 		SPDK_ERRLOG("ERROR trying to get crypto ops!\n");
727 		rc = -ENOMEM;
728 		goto error_get_ops;
729 	}
730 
731 	/* For encryption, we need to prepare a single contiguous buffer as the encryption
732 	 * destination, we'll then pass that along for the write after encryption is done.
733 	 * This is done to avoiding encrypting the provided write buffer which may be
734 	 * undesirable in some use cases.
735 	 */
736 	if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
737 		io_ctx->aux_buf_iov.iov_len = total_length;
738 		io_ctx->aux_buf_raw = aux_buf;
739 		io_ctx->aux_buf_iov.iov_base  = (void *)(((uintptr_t)aux_buf + (alignment - 1)) & ~(alignment - 1));
740 		io_ctx->aux_offset_blocks = bdev_io->u.bdev.offset_blocks;
741 		io_ctx->aux_num_blocks = bdev_io->u.bdev.num_blocks;
742 	}
743 
744 	/* This value is used in the completion callback to determine when the bdev_io is
745 	 * complete.
746 	 */
747 	io_ctx->cryop_cnt_remaining = cryop_cnt;
748 
749 	/* As we don't support chaining because of a decision to use LBA as IV, construction
750 	 * of crypto operations is straightforward. We build both the op, the mbuf and the
751 	 * dst_mbuf in our local arrays by looping through the length of the bdev IO and
752 	 * picking off LBA sized blocks of memory from the IOVs as we walk through them. Each
753 	 * LBA sized chunk of memory will correspond 1:1 to a crypto operation and a single
754 	 * mbuf per crypto operation.
755 	 */
756 	total_remaining = total_length;
757 	current_iov = bdev_io->u.bdev.iovs[iov_index].iov_base;
758 	current_iov_remaining = bdev_io->u.bdev.iovs[iov_index].iov_len;
759 	do {
760 		uint8_t *iv_ptr;
761 		uint64_t op_block_offset;
762 
763 		/* Set the mbuf elements address and length. Null out the next pointer. */
764 		src_mbufs[crypto_index]->buf_addr = current_iov;
765 		src_mbufs[crypto_index]->data_len = updated_length = crypto_len;
766 		/* TODO: Make this assignment conditional on QAT usage and add an assert. */
767 		src_mbufs[crypto_index]->buf_iova = spdk_vtophys((void *)current_iov, &updated_length);
768 		src_mbufs[crypto_index]->next = NULL;
769 		/* Store context in every mbuf as we don't know anything about completion order */
770 		src_mbufs[crypto_index]->userdata = bdev_io;
771 
772 		/* Set the IV - we use the LBA of the crypto_op */
773 		iv_ptr = rte_crypto_op_ctod_offset(crypto_ops[crypto_index], uint8_t *,
774 						   IV_OFFSET);
775 		memset(iv_ptr, 0, AES_CBC_IV_LENGTH);
776 		op_block_offset = bdev_io->u.bdev.offset_blocks + crypto_index;
777 		rte_memcpy(iv_ptr, &op_block_offset, sizeof(uint64_t));
778 
779 		/* Set the data to encrypt/decrypt length */
780 		crypto_ops[crypto_index]->sym->cipher.data.length = crypto_len;
781 		crypto_ops[crypto_index]->sym->cipher.data.offset = 0;
782 
783 		/* link the mbuf to the crypto op. */
784 		crypto_ops[crypto_index]->sym->m_src = src_mbufs[crypto_index];
785 		if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
786 			crypto_ops[crypto_index]->sym->m_dst = src_mbufs[crypto_index];
787 		} else {
788 			crypto_ops[crypto_index]->sym->m_dst = NULL;
789 		}
790 
791 		/* For encrypt, point the destination to a buffer we allocate and redirect the bdev_io
792 		 * that will be used to process the write on completion to the same buffer. Setting
793 		 * up the en_buffer is a little simpler as we know the destination buffer is single IOV.
794 		 */
795 		if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
796 
797 			/* Set the relevant destination en_mbuf elements. */
798 			dst_mbufs[crypto_index]->buf_addr = io_ctx->aux_buf_iov.iov_base + en_offset;
799 			dst_mbufs[crypto_index]->data_len = updated_length = crypto_len;
800 			/* TODO: Make this assignment conditional on QAT usage and add an assert. */
801 			dst_mbufs[crypto_index]->buf_iova = spdk_vtophys(dst_mbufs[crypto_index]->buf_addr,
802 							    &updated_length);
803 			crypto_ops[crypto_index]->sym->m_dst = dst_mbufs[crypto_index];
804 			en_offset += crypto_len;
805 			dst_mbufs[crypto_index]->next = NULL;
806 
807 			/* Attach the crypto session to the operation */
808 			rc = rte_crypto_op_attach_sym_session(crypto_ops[crypto_index],
809 							      io_ctx->crypto_bdev->session_encrypt);
810 			if (rc) {
811 				rc = -EINVAL;
812 				goto error_attach_session;
813 			}
814 
815 		} else {
816 			/* Attach the crypto session to the operation */
817 			rc = rte_crypto_op_attach_sym_session(crypto_ops[crypto_index],
818 							      io_ctx->crypto_bdev->session_decrypt);
819 			if (rc) {
820 				rc = -EINVAL;
821 				goto error_attach_session;
822 			}
823 
824 
825 		}
826 
827 		/* Subtract our running totals for the op in progress and the overall bdev io */
828 		total_remaining -= crypto_len;
829 		current_iov_remaining -= crypto_len;
830 
831 		/* move our current IOV pointer accordingly. */
832 		current_iov += crypto_len;
833 
834 		/* move on to the next crypto operation */
835 		crypto_index++;
836 
837 		/* If we're done with this IOV, move to the next one. */
838 		if (current_iov_remaining == 0 && total_remaining > 0) {
839 			iov_index++;
840 			current_iov = bdev_io->u.bdev.iovs[iov_index].iov_base;
841 			current_iov_remaining = bdev_io->u.bdev.iovs[iov_index].iov_len;
842 		}
843 	} while (total_remaining > 0);
844 
845 	/* Enqueue everything we've got but limit by the max number of descriptors we
846 	 * configured the crypto device for.
847 	 */
848 	burst = spdk_min(cryop_cnt, CRYPTO_QP_DESCRIPTORS);
849 	num_enqueued_ops = rte_cryptodev_enqueue_burst(cdev_id, crypto_ch->device_qp->qp,
850 			   &crypto_ops[0],
851 			   burst);
852 
853 	/* Add this bdev_io to our outstanding list if any of its crypto ops made it. */
854 	if (num_enqueued_ops > 0) {
855 		TAILQ_INSERT_TAIL(&crypto_ch->pending_cry_ios, bdev_io, module_link);
856 		io_ctx->on_pending_list = true;
857 	}
858 	/* We were unable to enqueue everything but did get some, so need to decide what
859 	 * to do based on the status of the last op.
860 	 */
861 	if (num_enqueued_ops < cryop_cnt) {
862 		switch (crypto_ops[num_enqueued_ops]->status) {
863 		case RTE_CRYPTO_OP_STATUS_NOT_PROCESSED:
864 			/* Queue them up on a linked list to be resubmitted via the poller. */
865 			for (crypto_index = num_enqueued_ops; crypto_index < cryop_cnt; crypto_index++) {
866 				op_to_queue = (struct vbdev_crypto_op *)rte_crypto_op_ctod_offset(crypto_ops[crypto_index],
867 						uint8_t *, QUEUED_OP_OFFSET);
868 				op_to_queue->cdev_id = cdev_id;
869 				op_to_queue->qp = crypto_ch->device_qp->qp;
870 				op_to_queue->crypto_op = crypto_ops[crypto_index];
871 				op_to_queue->bdev_io = bdev_io;
872 				TAILQ_INSERT_TAIL(&crypto_ch->queued_cry_ops,
873 						  op_to_queue,
874 						  link);
875 			}
876 			break;
877 		default:
878 			/* For all other statuses, set the io_ctx bdev_io status so that
879 			 * the poller will pick the failure up for the overall bdev status.
880 			 */
881 			io_ctx->bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED;
882 			if (num_enqueued_ops == 0) {
883 				/* If nothing was enqueued, but the last one wasn't because of
884 				 * busy, fail it now as the poller won't know anything about it.
885 				 */
886 				_crypto_operation_complete(bdev_io);
887 				rc = -EINVAL;
888 				goto error_attach_session;
889 			}
890 			break;
891 		}
892 	}
893 
894 	return rc;
895 
896 	/* Error cleanup paths. */
897 error_attach_session:
898 error_get_ops:
899 	if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
900 		spdk_mempool_put_bulk(g_mbuf_mp, (void **)&dst_mbufs[0],
901 				      cryop_cnt);
902 	}
903 	if (allocated > 0) {
904 		rte_mempool_put_bulk(g_crypto_op_mp, (void **)crypto_ops,
905 				     allocated);
906 	}
907 error_get_dst:
908 	spdk_mempool_put_bulk(g_mbuf_mp, (void **)&src_mbufs[0],
909 			      cryop_cnt);
910 	return rc;
911 }
912 
913 /* This function is called after all channels have been quiesced following
914  * a bdev reset.
915  */
916 static void
917 _ch_quiesce_done(struct spdk_io_channel_iter *i, int status)
918 {
919 	struct crypto_bdev_io *io_ctx = spdk_io_channel_iter_get_ctx(i);
920 
921 	assert(TAILQ_EMPTY(&io_ctx->crypto_ch->pending_cry_ios));
922 	assert(io_ctx->orig_io != NULL);
923 
924 	spdk_bdev_io_complete(io_ctx->orig_io, SPDK_BDEV_IO_STATUS_SUCCESS);
925 }
926 
927 /* This function is called per channel to quiesce IOs before completing a
928  * bdev reset that we received.
929  */
930 static void
931 _ch_quiesce(struct spdk_io_channel_iter *i)
932 {
933 	struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
934 	struct crypto_io_channel *crypto_ch = spdk_io_channel_get_ctx(ch);
935 
936 	crypto_ch->iter = i;
937 	/* When the poller runs, it will see the non-NULL iter and handle
938 	 * the quiesce.
939 	 */
940 }
941 
942 /* Completion callback for IO that were issued from this bdev other than read/write.
943  * They have their own for readability.
944  */
945 static void
946 _complete_internal_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
947 {
948 	struct spdk_bdev_io *orig_io = cb_arg;
949 	int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
950 
951 	if (bdev_io->type == SPDK_BDEV_IO_TYPE_RESET) {
952 		struct crypto_bdev_io *orig_ctx = (struct crypto_bdev_io *)orig_io->driver_ctx;
953 
954 		assert(orig_io == orig_ctx->orig_io);
955 
956 		spdk_bdev_free_io(bdev_io);
957 
958 		spdk_for_each_channel(orig_ctx->crypto_bdev,
959 				      _ch_quiesce,
960 				      orig_ctx,
961 				      _ch_quiesce_done);
962 		return;
963 	}
964 
965 	spdk_bdev_io_complete(orig_io, status);
966 	spdk_bdev_free_io(bdev_io);
967 }
968 
969 /* Completion callback for writes that were issued from this bdev. */
970 static void
971 _complete_internal_write(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
972 {
973 	struct spdk_bdev_io *orig_io = cb_arg;
974 	int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
975 	struct crypto_bdev_io *orig_ctx = (struct crypto_bdev_io *)orig_io->driver_ctx;
976 
977 	spdk_bdev_io_put_aux_buf(orig_io, orig_ctx->aux_buf_raw);
978 
979 	spdk_bdev_io_complete(orig_io, status);
980 	spdk_bdev_free_io(bdev_io);
981 }
982 
983 /* Completion callback for reads that were issued from this bdev. */
984 static void
985 _complete_internal_read(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
986 {
987 	struct spdk_bdev_io *orig_io = cb_arg;
988 	struct crypto_bdev_io *orig_ctx = (struct crypto_bdev_io *)orig_io->driver_ctx;
989 
990 	if (success) {
991 
992 		/* Save off this bdev_io so it can be freed after decryption. */
993 		orig_ctx->read_io = bdev_io;
994 
995 		if (!_crypto_operation(orig_io, RTE_CRYPTO_CIPHER_OP_DECRYPT, NULL)) {
996 			return;
997 		} else {
998 			SPDK_ERRLOG("ERROR decrypting\n");
999 		}
1000 	} else {
1001 		SPDK_ERRLOG("ERROR on read prior to decrypting\n");
1002 	}
1003 
1004 	spdk_bdev_io_complete(orig_io, SPDK_BDEV_IO_STATUS_FAILED);
1005 	spdk_bdev_free_io(bdev_io);
1006 }
1007 
1008 static void
1009 vbdev_crypto_resubmit_io(void *arg)
1010 {
1011 	struct spdk_bdev_io *bdev_io = (struct spdk_bdev_io *)arg;
1012 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
1013 
1014 	vbdev_crypto_submit_request(io_ctx->ch, bdev_io);
1015 }
1016 
1017 static void
1018 vbdev_crypto_queue_io(struct spdk_bdev_io *bdev_io)
1019 {
1020 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
1021 	int rc;
1022 
1023 	io_ctx->bdev_io_wait.bdev = bdev_io->bdev;
1024 	io_ctx->bdev_io_wait.cb_fn = vbdev_crypto_resubmit_io;
1025 	io_ctx->bdev_io_wait.cb_arg = bdev_io;
1026 
1027 	rc = spdk_bdev_queue_io_wait(bdev_io->bdev, io_ctx->ch, &io_ctx->bdev_io_wait);
1028 	if (rc != 0) {
1029 		SPDK_ERRLOG("Queue io failed in vbdev_crypto_queue_io, rc=%d.\n", rc);
1030 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1031 	}
1032 }
1033 
1034 /* Callback for getting a buf from the bdev pool in the event that the caller passed
1035  * in NULL, we need to own the buffer so it doesn't get freed by another vbdev module
1036  * beneath us before we're done with it.
1037  */
1038 static void
1039 crypto_read_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
1040 		       bool success)
1041 {
1042 	struct vbdev_crypto *crypto_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_crypto,
1043 					   crypto_bdev);
1044 	struct crypto_io_channel *crypto_ch = spdk_io_channel_get_ctx(ch);
1045 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
1046 	int rc;
1047 
1048 	if (!success) {
1049 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1050 		return;
1051 	}
1052 
1053 	rc = spdk_bdev_readv_blocks(crypto_bdev->base_desc, crypto_ch->base_ch, bdev_io->u.bdev.iovs,
1054 				    bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.offset_blocks,
1055 				    bdev_io->u.bdev.num_blocks, _complete_internal_read,
1056 				    bdev_io);
1057 	if (rc != 0) {
1058 		if (rc == -ENOMEM) {
1059 			SPDK_DEBUGLOG(SPDK_LOG_CRYPTO, "No memory, queue the IO.\n");
1060 			io_ctx->ch = ch;
1061 			vbdev_crypto_queue_io(bdev_io);
1062 		} else {
1063 			SPDK_ERRLOG("ERROR on bdev_io submission!\n");
1064 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1065 		}
1066 	}
1067 }
1068 
1069 /* For encryption we don't want to encrypt the data in place as the host isn't
1070  * expecting us to mangle its data buffers so we need to encrypt into the bdev
1071  * aux buffer, then we can use that as the source for the disk data transfer.
1072  */
1073 static void
1074 crypto_write_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
1075 			void *aux_buf)
1076 {
1077 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
1078 	int rc = 0;
1079 
1080 	rc = _crypto_operation(bdev_io, RTE_CRYPTO_CIPHER_OP_ENCRYPT, aux_buf);
1081 	if (rc != 0) {
1082 		spdk_bdev_io_put_aux_buf(bdev_io, aux_buf);
1083 		if (rc == -ENOMEM) {
1084 			SPDK_DEBUGLOG(SPDK_LOG_CRYPTO, "No memory, queue the IO.\n");
1085 			io_ctx->ch = ch;
1086 			vbdev_crypto_queue_io(bdev_io);
1087 		} else {
1088 			SPDK_ERRLOG("ERROR on bdev_io submission!\n");
1089 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1090 		}
1091 	}
1092 }
1093 
1094 /* Called when someone submits IO to this crypto vbdev. For IO's not relevant to crypto,
1095  * we're simply passing it on here via SPDK IO calls which in turn allocate another bdev IO
1096  * and call our cpl callback provided below along with the original bdev_io so that we can
1097  * complete it once this IO completes. For crypto operations, we'll either encrypt it first
1098  * (writes) then call back into bdev to submit it or we'll submit a read and then catch it
1099  * on the way back for decryption.
1100  */
1101 static void
1102 vbdev_crypto_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
1103 {
1104 	struct vbdev_crypto *crypto_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_crypto,
1105 					   crypto_bdev);
1106 	struct crypto_io_channel *crypto_ch = spdk_io_channel_get_ctx(ch);
1107 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
1108 	int rc = 0;
1109 
1110 	memset(io_ctx, 0, sizeof(struct crypto_bdev_io));
1111 	io_ctx->crypto_bdev = crypto_bdev;
1112 	io_ctx->crypto_ch = crypto_ch;
1113 	io_ctx->orig_io = bdev_io;
1114 	io_ctx->bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
1115 
1116 	switch (bdev_io->type) {
1117 	case SPDK_BDEV_IO_TYPE_READ:
1118 		spdk_bdev_io_get_buf(bdev_io, crypto_read_get_buf_cb,
1119 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
1120 		break;
1121 	case SPDK_BDEV_IO_TYPE_WRITE:
1122 		/* Tell the bdev layer that we need an aux buf in addition to the data
1123 		 * buf already associated with the bdev.
1124 		 */
1125 		spdk_bdev_io_get_aux_buf(bdev_io, crypto_write_get_buf_cb);
1126 		break;
1127 	case SPDK_BDEV_IO_TYPE_UNMAP:
1128 		rc = spdk_bdev_unmap_blocks(crypto_bdev->base_desc, crypto_ch->base_ch,
1129 					    bdev_io->u.bdev.offset_blocks,
1130 					    bdev_io->u.bdev.num_blocks,
1131 					    _complete_internal_io, bdev_io);
1132 		break;
1133 	case SPDK_BDEV_IO_TYPE_FLUSH:
1134 		rc = spdk_bdev_flush_blocks(crypto_bdev->base_desc, crypto_ch->base_ch,
1135 					    bdev_io->u.bdev.offset_blocks,
1136 					    bdev_io->u.bdev.num_blocks,
1137 					    _complete_internal_io, bdev_io);
1138 		break;
1139 	case SPDK_BDEV_IO_TYPE_RESET:
1140 		rc = spdk_bdev_reset(crypto_bdev->base_desc, crypto_ch->base_ch,
1141 				     _complete_internal_io, bdev_io);
1142 		break;
1143 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
1144 	default:
1145 		SPDK_ERRLOG("crypto: unknown I/O type %d\n", bdev_io->type);
1146 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1147 		return;
1148 	}
1149 
1150 	if (rc != 0) {
1151 		if (rc == -ENOMEM) {
1152 			SPDK_DEBUGLOG(SPDK_LOG_CRYPTO, "No memory, queue the IO.\n");
1153 			io_ctx->ch = ch;
1154 			vbdev_crypto_queue_io(bdev_io);
1155 		} else {
1156 			SPDK_ERRLOG("ERROR on bdev_io submission!\n");
1157 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1158 		}
1159 	}
1160 }
1161 
1162 /* We'll just call the base bdev and let it answer except for WZ command which
1163  * we always say we don't support so that the bdev layer will actually send us
1164  * real writes that we can encrypt.
1165  */
1166 static bool
1167 vbdev_crypto_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
1168 {
1169 	struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx;
1170 
1171 	switch (io_type) {
1172 	case SPDK_BDEV_IO_TYPE_WRITE:
1173 	case SPDK_BDEV_IO_TYPE_UNMAP:
1174 	case SPDK_BDEV_IO_TYPE_RESET:
1175 	case SPDK_BDEV_IO_TYPE_READ:
1176 	case SPDK_BDEV_IO_TYPE_FLUSH:
1177 		return spdk_bdev_io_type_supported(crypto_bdev->base_bdev, io_type);
1178 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
1179 	/* Force the bdev layer to issue actual writes of zeroes so we can
1180 	 * encrypt them as regular writes.
1181 	 */
1182 	default:
1183 		return false;
1184 	}
1185 }
1186 
1187 /* Callback for unregistering the IO device. */
1188 static void
1189 _device_unregister_cb(void *io_device)
1190 {
1191 	struct vbdev_crypto *crypto_bdev = io_device;
1192 
1193 	/* Done with this crypto_bdev. */
1194 	rte_cryptodev_sym_session_free(crypto_bdev->session_decrypt);
1195 	rte_cryptodev_sym_session_free(crypto_bdev->session_encrypt);
1196 	free(crypto_bdev->drv_name);
1197 	free(crypto_bdev->key);
1198 	free(crypto_bdev->crypto_bdev.name);
1199 	free(crypto_bdev);
1200 }
1201 
1202 /* Called after we've unregistered following a hot remove callback.
1203  * Our finish entry point will be called next.
1204  */
1205 static int
1206 vbdev_crypto_destruct(void *ctx)
1207 {
1208 	struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx;
1209 
1210 	/* Remove this device from the internal list */
1211 	TAILQ_REMOVE(&g_vbdev_crypto, crypto_bdev, link);
1212 
1213 	/* Unclaim the underlying bdev. */
1214 	spdk_bdev_module_release_bdev(crypto_bdev->base_bdev);
1215 
1216 	/* Close the underlying bdev. */
1217 	spdk_bdev_close(crypto_bdev->base_desc);
1218 
1219 	/* Unregister the io_device. */
1220 	spdk_io_device_unregister(crypto_bdev, _device_unregister_cb);
1221 
1222 	g_number_of_claimed_volumes--;
1223 
1224 	return 0;
1225 }
1226 
1227 /* We supplied this as an entry point for upper layers who want to communicate to this
1228  * bdev.  This is how they get a channel. We are passed the same context we provided when
1229  * we created our crypto vbdev in examine() which, for this bdev, is the address of one of
1230  * our context nodes. From here we'll ask the SPDK channel code to fill out our channel
1231  * struct and we'll keep it in our crypto node.
1232  */
1233 static struct spdk_io_channel *
1234 vbdev_crypto_get_io_channel(void *ctx)
1235 {
1236 	struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx;
1237 
1238 	/* The IO channel code will allocate a channel for us which consists of
1239 	 * the SPDK channel structure plus the size of our crypto_io_channel struct
1240 	 * that we passed in when we registered our IO device. It will then call
1241 	 * our channel create callback to populate any elements that we need to
1242 	 * update.
1243 	 */
1244 	return spdk_get_io_channel(crypto_bdev);
1245 }
1246 
1247 /* This is the output for bdev_get_bdevs() for this vbdev */
1248 static int
1249 vbdev_crypto_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
1250 {
1251 	struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx;
1252 
1253 	spdk_json_write_name(w, "crypto");
1254 	spdk_json_write_object_begin(w);
1255 	spdk_json_write_named_string(w, "base_bdev_name", spdk_bdev_get_name(crypto_bdev->base_bdev));
1256 	spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&crypto_bdev->crypto_bdev));
1257 	spdk_json_write_named_string(w, "crypto_pmd", crypto_bdev->drv_name);
1258 	spdk_json_write_named_string(w, "key", crypto_bdev->key);
1259 	spdk_json_write_object_end(w);
1260 	return 0;
1261 }
1262 
1263 static int
1264 vbdev_crypto_config_json(struct spdk_json_write_ctx *w)
1265 {
1266 	struct vbdev_crypto *crypto_bdev;
1267 
1268 	TAILQ_FOREACH(crypto_bdev, &g_vbdev_crypto, link) {
1269 		spdk_json_write_object_begin(w);
1270 		spdk_json_write_named_string(w, "method", "bdev_crypto_create");
1271 		spdk_json_write_named_object_begin(w, "params");
1272 		spdk_json_write_named_string(w, "base_bdev_name", spdk_bdev_get_name(crypto_bdev->base_bdev));
1273 		spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&crypto_bdev->crypto_bdev));
1274 		spdk_json_write_named_string(w, "crypto_pmd", crypto_bdev->drv_name);
1275 		spdk_json_write_named_string(w, "key", crypto_bdev->key);
1276 		spdk_json_write_object_end(w);
1277 		spdk_json_write_object_end(w);
1278 	}
1279 	return 0;
1280 }
1281 
1282 /* Helper function for the channel creation callback. */
1283 static void
1284 _assign_device_qp(struct vbdev_crypto *crypto_bdev, struct device_qp *device_qp,
1285 		  struct crypto_io_channel *crypto_ch)
1286 {
1287 	pthread_mutex_lock(&g_device_qp_lock);
1288 	if (strcmp(crypto_bdev->drv_name, QAT) == 0) {
1289 		/* For some QAT devices, the optimal qp to use is every 32nd as this spreads the
1290 		 * workload out over the multiple virtual functions in the device. For the devices
1291 		 * where this isn't the case, it doesn't hurt.
1292 		 */
1293 		TAILQ_FOREACH(device_qp, &g_device_qp_qat, link) {
1294 			if (device_qp->index != g_next_qat_index) {
1295 				continue;
1296 			}
1297 			if (device_qp->in_use == false) {
1298 				crypto_ch->device_qp = device_qp;
1299 				device_qp->in_use = true;
1300 				g_next_qat_index = (g_next_qat_index + QAT_VF_SPREAD) % g_qat_total_qp;
1301 				break;
1302 			} else {
1303 				/* if the preferred index is used, skip to the next one in this set. */
1304 				g_next_qat_index = (g_next_qat_index + 1) % g_qat_total_qp;
1305 			}
1306 		}
1307 	} else if (strcmp(crypto_bdev->drv_name, AESNI_MB) == 0) {
1308 		TAILQ_FOREACH(device_qp, &g_device_qp_aesni_mb, link) {
1309 			if (device_qp->in_use == false) {
1310 				crypto_ch->device_qp = device_qp;
1311 				device_qp->in_use = true;
1312 				break;
1313 			}
1314 		}
1315 	}
1316 	pthread_mutex_unlock(&g_device_qp_lock);
1317 }
1318 
1319 /* We provide this callback for the SPDK channel code to create a channel using
1320  * the channel struct we provided in our module get_io_channel() entry point. Here
1321  * we get and save off an underlying base channel of the device below us so that
1322  * we can communicate with the base bdev on a per channel basis. We also register the
1323  * poller used to complete crypto operations from the device.
1324  */
1325 static int
1326 crypto_bdev_ch_create_cb(void *io_device, void *ctx_buf)
1327 {
1328 	struct crypto_io_channel *crypto_ch = ctx_buf;
1329 	struct vbdev_crypto *crypto_bdev = io_device;
1330 	struct device_qp *device_qp = NULL;
1331 
1332 	crypto_ch->base_ch = spdk_bdev_get_io_channel(crypto_bdev->base_desc);
1333 	crypto_ch->poller = spdk_poller_register(crypto_dev_poller, crypto_ch, 0);
1334 	crypto_ch->device_qp = NULL;
1335 
1336 	/* Assign a device/qp combination that is unique per channel per PMD. */
1337 	_assign_device_qp(crypto_bdev, device_qp, crypto_ch);
1338 	assert(crypto_ch->device_qp);
1339 
1340 	/* We use this queue to track outstanding IO in our layer. */
1341 	TAILQ_INIT(&crypto_ch->pending_cry_ios);
1342 
1343 	/* We use this to queue up crypto ops when the device is busy. */
1344 	TAILQ_INIT(&crypto_ch->queued_cry_ops);
1345 
1346 	return 0;
1347 }
1348 
1349 /* We provide this callback for the SPDK channel code to destroy a channel
1350  * created with our create callback. We just need to undo anything we did
1351  * when we created.
1352  */
1353 static void
1354 crypto_bdev_ch_destroy_cb(void *io_device, void *ctx_buf)
1355 {
1356 	struct crypto_io_channel *crypto_ch = ctx_buf;
1357 
1358 	pthread_mutex_lock(&g_device_qp_lock);
1359 	crypto_ch->device_qp->in_use = false;
1360 	pthread_mutex_unlock(&g_device_qp_lock);
1361 
1362 	spdk_poller_unregister(&crypto_ch->poller);
1363 	spdk_put_io_channel(crypto_ch->base_ch);
1364 }
1365 
1366 /* Create the association from the bdev and vbdev name and insert
1367  * on the global list. */
1368 static int
1369 vbdev_crypto_insert_name(const char *bdev_name, const char *vbdev_name,
1370 			 const char *crypto_pmd, const char *key)
1371 {
1372 	struct bdev_names *name;
1373 	int rc, j;
1374 	bool found = false;
1375 
1376 	TAILQ_FOREACH(name, &g_bdev_names, link) {
1377 		if (strcmp(vbdev_name, name->vbdev_name) == 0) {
1378 			SPDK_ERRLOG("crypto bdev %s already exists\n", vbdev_name);
1379 			return -EEXIST;
1380 		}
1381 	}
1382 
1383 	name = calloc(1, sizeof(struct bdev_names));
1384 	if (!name) {
1385 		SPDK_ERRLOG("could not allocate bdev_names\n");
1386 		return -ENOMEM;
1387 	}
1388 
1389 	name->bdev_name = strdup(bdev_name);
1390 	if (!name->bdev_name) {
1391 		SPDK_ERRLOG("could not allocate name->bdev_name\n");
1392 		rc = -ENOMEM;
1393 		goto error_alloc_bname;
1394 	}
1395 
1396 	name->vbdev_name = strdup(vbdev_name);
1397 	if (!name->vbdev_name) {
1398 		SPDK_ERRLOG("could not allocate name->vbdev_name\n");
1399 		rc = -ENOMEM;
1400 		goto error_alloc_vname;
1401 	}
1402 
1403 	name->drv_name = strdup(crypto_pmd);
1404 	if (!name->drv_name) {
1405 		SPDK_ERRLOG("could not allocate name->drv_name\n");
1406 		rc = -ENOMEM;
1407 		goto error_alloc_dname;
1408 	}
1409 	for (j = 0; j < MAX_NUM_DRV_TYPES ; j++) {
1410 		if (strcmp(crypto_pmd, g_driver_names[j]) == 0) {
1411 			found = true;
1412 			break;
1413 		}
1414 	}
1415 	if (!found) {
1416 		SPDK_ERRLOG("invalid crypto PMD type %s\n", crypto_pmd);
1417 		rc = -EINVAL;
1418 		goto error_invalid_pmd;
1419 	}
1420 
1421 	name->key = strdup(key);
1422 	if (!name->key) {
1423 		SPDK_ERRLOG("could not allocate name->key\n");
1424 		rc = -ENOMEM;
1425 		goto error_alloc_key;
1426 	}
1427 	if (strlen(name->key) != AES_CBC_KEY_LENGTH) {
1428 		SPDK_ERRLOG("invalid AES_CCB key length\n");
1429 		rc = -EINVAL;
1430 		goto error_invalid_key;
1431 	}
1432 
1433 	TAILQ_INSERT_TAIL(&g_bdev_names, name, link);
1434 
1435 	return 0;
1436 
1437 	/* Error cleanup paths. */
1438 error_invalid_key:
1439 	free(name->key);
1440 error_alloc_key:
1441 error_invalid_pmd:
1442 	free(name->drv_name);
1443 error_alloc_dname:
1444 	free(name->vbdev_name);
1445 error_alloc_vname:
1446 	free(name->bdev_name);
1447 error_alloc_bname:
1448 	free(name);
1449 	return rc;
1450 }
1451 
1452 /* RPC entry point for crypto creation. */
1453 int
1454 create_crypto_disk(const char *bdev_name, const char *vbdev_name,
1455 		   const char *crypto_pmd, const char *key)
1456 {
1457 	struct spdk_bdev *bdev = NULL;
1458 	int rc = 0;
1459 
1460 	bdev = spdk_bdev_get_by_name(bdev_name);
1461 
1462 	rc = vbdev_crypto_insert_name(bdev_name, vbdev_name, crypto_pmd, key);
1463 	if (rc) {
1464 		return rc;
1465 	}
1466 
1467 	if (!bdev) {
1468 		SPDK_NOTICELOG("vbdev creation deferred pending base bdev arrival\n");
1469 		return 0;
1470 	}
1471 
1472 	rc = vbdev_crypto_claim(bdev);
1473 	if (rc) {
1474 		return rc;
1475 	}
1476 
1477 	return rc;
1478 }
1479 
1480 /* Called at driver init time, parses config file to prepare for examine calls,
1481  * also fully initializes the crypto drivers.
1482  */
1483 static int
1484 vbdev_crypto_init(void)
1485 {
1486 	struct spdk_conf_section *sp = NULL;
1487 	const char *conf_bdev_name = NULL;
1488 	const char *conf_vbdev_name = NULL;
1489 	const char *crypto_pmd = NULL;
1490 	int i;
1491 	int rc = 0;
1492 	const char *key = NULL;
1493 
1494 	/* Fully configure both SW and HW drivers. */
1495 	rc = vbdev_crypto_init_crypto_drivers();
1496 	if (rc) {
1497 		SPDK_ERRLOG("Error setting up crypto devices\n");
1498 		return rc;
1499 	}
1500 
1501 	sp = spdk_conf_find_section(NULL, "crypto");
1502 	if (sp == NULL) {
1503 		return 0;
1504 	}
1505 
1506 	for (i = 0; ; i++) {
1507 
1508 		if (!spdk_conf_section_get_nval(sp, "CRY", i)) {
1509 			break;
1510 		}
1511 
1512 		conf_bdev_name = spdk_conf_section_get_nmval(sp, "CRY", i, 0);
1513 		if (!conf_bdev_name) {
1514 			SPDK_ERRLOG("crypto configuration missing bdev name\n");
1515 			return -EINVAL;
1516 		}
1517 
1518 		conf_vbdev_name = spdk_conf_section_get_nmval(sp, "CRY", i, 1);
1519 		if (!conf_vbdev_name) {
1520 			SPDK_ERRLOG("crypto configuration missing crypto_bdev name\n");
1521 			return -EINVAL;
1522 		}
1523 
1524 		key = spdk_conf_section_get_nmval(sp, "CRY", i, 2);
1525 		if (!key) {
1526 			SPDK_ERRLOG("crypto configuration missing crypto_bdev key\n");
1527 			return -EINVAL;
1528 		}
1529 		SPDK_NOTICELOG("WARNING: You are storing your key in a plain text file!!\n");
1530 
1531 		crypto_pmd = spdk_conf_section_get_nmval(sp, "CRY", i, 3);
1532 		if (!crypto_pmd) {
1533 			SPDK_ERRLOG("crypto configuration missing driver type\n");
1534 			return -EINVAL;
1535 		}
1536 
1537 		rc = vbdev_crypto_insert_name(conf_bdev_name, conf_vbdev_name,
1538 					      crypto_pmd, key);
1539 		if (rc != 0) {
1540 			return rc;
1541 		}
1542 	}
1543 
1544 	return rc;
1545 }
1546 
1547 /* Called when the entire module is being torn down. */
1548 static void
1549 vbdev_crypto_finish(void)
1550 {
1551 	struct bdev_names *name;
1552 	struct vbdev_dev *device;
1553 	struct device_qp *dev_qp;
1554 	unsigned i;
1555 	int rc;
1556 
1557 	while ((name = TAILQ_FIRST(&g_bdev_names))) {
1558 		TAILQ_REMOVE(&g_bdev_names, name, link);
1559 		free(name->drv_name);
1560 		free(name->key);
1561 		free(name->bdev_name);
1562 		free(name->vbdev_name);
1563 		free(name);
1564 	}
1565 
1566 	while ((device = TAILQ_FIRST(&g_vbdev_devs))) {
1567 		struct rte_cryptodev *rte_dev;
1568 
1569 		TAILQ_REMOVE(&g_vbdev_devs, device, link);
1570 		rte_cryptodev_stop(device->cdev_id);
1571 
1572 		assert(device->cdev_id < RTE_CRYPTO_MAX_DEVS);
1573 		rte_dev = &rte_cryptodevs[device->cdev_id];
1574 
1575 		if (rte_dev->dev_ops->queue_pair_release != NULL) {
1576 			for (i = 0; i < device->cdev_info.max_nb_queue_pairs; i++) {
1577 				rte_dev->dev_ops->queue_pair_release(rte_dev, i);
1578 			}
1579 		}
1580 		free(device);
1581 	}
1582 	rc = rte_vdev_uninit(AESNI_MB);
1583 	if (rc) {
1584 		SPDK_ERRLOG("%d from rte_vdev_uninit\n", rc);
1585 	}
1586 
1587 	while ((dev_qp = TAILQ_FIRST(&g_device_qp_qat))) {
1588 		TAILQ_REMOVE(&g_device_qp_qat, dev_qp, link);
1589 		free(dev_qp);
1590 	}
1591 
1592 	while ((dev_qp = TAILQ_FIRST(&g_device_qp_aesni_mb))) {
1593 		TAILQ_REMOVE(&g_device_qp_aesni_mb, dev_qp, link);
1594 		free(dev_qp);
1595 	}
1596 
1597 	rte_mempool_free(g_crypto_op_mp);
1598 	spdk_mempool_free(g_mbuf_mp);
1599 	rte_mempool_free(g_session_mp);
1600 	if (g_session_mp_priv != NULL) {
1601 		rte_mempool_free(g_session_mp_priv);
1602 	}
1603 }
1604 
1605 /* During init we'll be asked how much memory we'd like passed to us
1606  * in bev_io structures as context. Here's where we specify how
1607  * much context we want per IO.
1608  */
1609 static int
1610 vbdev_crypto_get_ctx_size(void)
1611 {
1612 	return sizeof(struct crypto_bdev_io);
1613 }
1614 
1615 /* Called when SPDK wants to save the current config of this vbdev module to
1616  * a file.
1617  */
1618 static void
1619 vbdev_crypto_get_spdk_running_config(FILE *fp)
1620 {
1621 	struct bdev_names *names = NULL;
1622 	fprintf(fp, "\n[crypto]\n");
1623 	TAILQ_FOREACH(names, &g_bdev_names, link) {
1624 		fprintf(fp, "  crypto %s %s ", names->bdev_name, names->vbdev_name);
1625 		fprintf(fp, "\n");
1626 	}
1627 
1628 	fprintf(fp, "\n");
1629 }
1630 
1631 /* Called when the underlying base bdev goes away. */
1632 static void
1633 vbdev_crypto_examine_hotremove_cb(void *ctx)
1634 {
1635 	struct vbdev_crypto *crypto_bdev, *tmp;
1636 	struct spdk_bdev *bdev_find = ctx;
1637 
1638 	TAILQ_FOREACH_SAFE(crypto_bdev, &g_vbdev_crypto, link, tmp) {
1639 		if (bdev_find == crypto_bdev->base_bdev) {
1640 			spdk_bdev_unregister(&crypto_bdev->crypto_bdev, NULL, NULL);
1641 		}
1642 	}
1643 }
1644 
1645 static void
1646 vbdev_crypto_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
1647 {
1648 	/* No config per bdev needed */
1649 }
1650 
1651 /* When we register our bdev this is how we specify our entry points. */
1652 static const struct spdk_bdev_fn_table vbdev_crypto_fn_table = {
1653 	.destruct		= vbdev_crypto_destruct,
1654 	.submit_request		= vbdev_crypto_submit_request,
1655 	.io_type_supported	= vbdev_crypto_io_type_supported,
1656 	.get_io_channel		= vbdev_crypto_get_io_channel,
1657 	.dump_info_json		= vbdev_crypto_dump_info_json,
1658 	.write_config_json	= vbdev_crypto_write_config_json
1659 };
1660 
1661 static struct spdk_bdev_module crypto_if = {
1662 	.name = "crypto",
1663 	.module_init = vbdev_crypto_init,
1664 	.config_text = vbdev_crypto_get_spdk_running_config,
1665 	.get_ctx_size = vbdev_crypto_get_ctx_size,
1666 	.examine_config = vbdev_crypto_examine,
1667 	.module_fini = vbdev_crypto_finish,
1668 	.config_json = vbdev_crypto_config_json
1669 };
1670 
1671 SPDK_BDEV_MODULE_REGISTER(crypto, &crypto_if)
1672 
1673 static int
1674 vbdev_crypto_claim(struct spdk_bdev *bdev)
1675 {
1676 	struct bdev_names *name;
1677 	struct vbdev_crypto *vbdev;
1678 	struct vbdev_dev *device;
1679 	bool found = false;
1680 	int rc = 0;
1681 
1682 	if (g_number_of_claimed_volumes >= MAX_CRYPTO_VOLUMES) {
1683 		SPDK_DEBUGLOG(SPDK_LOG_CRYPTO, "Reached max number of claimed volumes\n");
1684 		rc = -EINVAL;
1685 		goto error_vbdev_alloc;
1686 	}
1687 	g_number_of_claimed_volumes++;
1688 
1689 	/* Check our list of names from config versus this bdev and if
1690 	 * there's a match, create the crypto_bdev & bdev accordingly.
1691 	 */
1692 	TAILQ_FOREACH(name, &g_bdev_names, link) {
1693 		if (strcmp(name->bdev_name, bdev->name) != 0) {
1694 			continue;
1695 		}
1696 		SPDK_DEBUGLOG(SPDK_LOG_CRYPTO, "Match on %s\n", bdev->name);
1697 
1698 		vbdev = calloc(1, sizeof(struct vbdev_crypto));
1699 		if (!vbdev) {
1700 			SPDK_ERRLOG("could not allocate crypto_bdev\n");
1701 			rc = -ENOMEM;
1702 			goto error_vbdev_alloc;
1703 		}
1704 
1705 		/* The base bdev that we're attaching to. */
1706 		vbdev->base_bdev = bdev;
1707 		vbdev->crypto_bdev.name = strdup(name->vbdev_name);
1708 		if (!vbdev->crypto_bdev.name) {
1709 			SPDK_ERRLOG("could not allocate crypto_bdev name\n");
1710 			rc = -ENOMEM;
1711 			goto error_bdev_name;
1712 		}
1713 
1714 		vbdev->key = strdup(name->key);
1715 		if (!vbdev->key) {
1716 			SPDK_ERRLOG("could not allocate crypto_bdev key\n");
1717 			rc = -ENOMEM;
1718 			goto error_alloc_key;
1719 		}
1720 
1721 		vbdev->drv_name = strdup(name->drv_name);
1722 		if (!vbdev->drv_name) {
1723 			SPDK_ERRLOG("could not allocate crypto_bdev drv_name\n");
1724 			rc = -ENOMEM;
1725 			goto error_drv_name;
1726 		}
1727 
1728 		vbdev->crypto_bdev.product_name = "crypto";
1729 		vbdev->crypto_bdev.write_cache = bdev->write_cache;
1730 		if (strcmp(vbdev->drv_name, QAT) == 0) {
1731 			vbdev->crypto_bdev.required_alignment =
1732 				spdk_max(spdk_u32log2(bdev->blocklen), bdev->required_alignment);
1733 			SPDK_NOTICELOG("QAT in use: Required alignment set to %u\n",
1734 				       vbdev->crypto_bdev.required_alignment);
1735 		} else {
1736 			vbdev->crypto_bdev.required_alignment = bdev->required_alignment;
1737 		}
1738 		/* Note: CRYPTO_MAX_IO is in units of bytes, optimal_io_boundary is
1739 		 * in units of blocks.
1740 		 */
1741 		if (bdev->optimal_io_boundary > 0) {
1742 			vbdev->crypto_bdev.optimal_io_boundary =
1743 				spdk_min((CRYPTO_MAX_IO / bdev->blocklen), bdev->optimal_io_boundary);
1744 		} else {
1745 			vbdev->crypto_bdev.optimal_io_boundary = (CRYPTO_MAX_IO / bdev->blocklen);
1746 		}
1747 		vbdev->crypto_bdev.split_on_optimal_io_boundary = true;
1748 		vbdev->crypto_bdev.blocklen = bdev->blocklen;
1749 		vbdev->crypto_bdev.blockcnt = bdev->blockcnt;
1750 
1751 		/* This is the context that is passed to us when the bdev
1752 		 * layer calls in so we'll save our crypto_bdev node here.
1753 		 */
1754 		vbdev->crypto_bdev.ctxt = vbdev;
1755 		vbdev->crypto_bdev.fn_table = &vbdev_crypto_fn_table;
1756 		vbdev->crypto_bdev.module = &crypto_if;
1757 		TAILQ_INSERT_TAIL(&g_vbdev_crypto, vbdev, link);
1758 
1759 		spdk_io_device_register(vbdev, crypto_bdev_ch_create_cb, crypto_bdev_ch_destroy_cb,
1760 					sizeof(struct crypto_io_channel), vbdev->crypto_bdev.name);
1761 
1762 		rc = spdk_bdev_open(bdev, true, vbdev_crypto_examine_hotremove_cb,
1763 				    bdev, &vbdev->base_desc);
1764 		if (rc) {
1765 			SPDK_ERRLOG("could not open bdev %s\n", spdk_bdev_get_name(bdev));
1766 			goto error_open;
1767 		}
1768 
1769 		rc = spdk_bdev_module_claim_bdev(bdev, vbdev->base_desc, vbdev->crypto_bdev.module);
1770 		if (rc) {
1771 			SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(bdev));
1772 			goto error_claim;
1773 		}
1774 
1775 		/* To init the session we have to get the cryptoDev device ID for this vbdev */
1776 		TAILQ_FOREACH(device, &g_vbdev_devs, link) {
1777 			if (strcmp(device->cdev_info.driver_name, vbdev->drv_name) == 0) {
1778 				found = true;
1779 				break;
1780 			}
1781 		}
1782 		if (found == false) {
1783 			SPDK_ERRLOG("ERROR can't match crypto device driver to crypto vbdev!\n");
1784 			rc = -EINVAL;
1785 			goto error_cant_find_devid;
1786 		}
1787 
1788 		/* Get sessions. */
1789 		vbdev->session_encrypt = rte_cryptodev_sym_session_create(g_session_mp);
1790 		if (NULL == vbdev->session_encrypt) {
1791 			SPDK_ERRLOG("ERROR trying to create crypto session!\n");
1792 			rc = -EINVAL;
1793 			goto error_session_en_create;
1794 		}
1795 
1796 		vbdev->session_decrypt = rte_cryptodev_sym_session_create(g_session_mp);
1797 		if (NULL == vbdev->session_decrypt) {
1798 			SPDK_ERRLOG("ERROR trying to create crypto session!\n");
1799 			rc = -EINVAL;
1800 			goto error_session_de_create;
1801 		}
1802 
1803 		/* Init our per vbdev xform with the desired cipher options. */
1804 		vbdev->cipher_xform.type = RTE_CRYPTO_SYM_XFORM_CIPHER;
1805 		vbdev->cipher_xform.cipher.key.data = vbdev->key;
1806 		vbdev->cipher_xform.cipher.iv.offset = IV_OFFSET;
1807 		vbdev->cipher_xform.cipher.algo = RTE_CRYPTO_CIPHER_AES_CBC;
1808 		vbdev->cipher_xform.cipher.key.length = AES_CBC_KEY_LENGTH;
1809 		vbdev->cipher_xform.cipher.iv.length = AES_CBC_IV_LENGTH;
1810 
1811 		vbdev->cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT;
1812 		rc = rte_cryptodev_sym_session_init(device->cdev_id, vbdev->session_encrypt,
1813 						    &vbdev->cipher_xform,
1814 						    g_session_mp_priv ? g_session_mp_priv : g_session_mp);
1815 		if (rc < 0) {
1816 			SPDK_ERRLOG("ERROR trying to init encrypt session!\n");
1817 			rc = -EINVAL;
1818 			goto error_session_init;
1819 		}
1820 
1821 		vbdev->cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_DECRYPT;
1822 		rc = rte_cryptodev_sym_session_init(device->cdev_id, vbdev->session_decrypt,
1823 						    &vbdev->cipher_xform,
1824 						    g_session_mp_priv ? g_session_mp_priv : g_session_mp);
1825 		if (rc < 0) {
1826 			SPDK_ERRLOG("ERROR trying to init decrypt session!\n");
1827 			rc = -EINVAL;
1828 			goto error_session_init;
1829 		}
1830 
1831 		rc = spdk_bdev_register(&vbdev->crypto_bdev);
1832 		if (rc < 0) {
1833 			SPDK_ERRLOG("ERROR trying to register bdev\n");
1834 			rc = -EINVAL;
1835 			goto error_bdev_register;
1836 		}
1837 		SPDK_DEBUGLOG(SPDK_LOG_CRYPTO, "registered io_device and virtual bdev for: %s\n",
1838 			      name->vbdev_name);
1839 		break;
1840 	}
1841 
1842 	return rc;
1843 
1844 	/* Error cleanup paths. */
1845 error_bdev_register:
1846 error_session_init:
1847 	rte_cryptodev_sym_session_free(vbdev->session_decrypt);
1848 error_session_de_create:
1849 	rte_cryptodev_sym_session_free(vbdev->session_encrypt);
1850 error_session_en_create:
1851 error_cant_find_devid:
1852 error_claim:
1853 	spdk_bdev_close(vbdev->base_desc);
1854 error_open:
1855 	TAILQ_REMOVE(&g_vbdev_crypto, vbdev, link);
1856 	spdk_io_device_unregister(vbdev, NULL);
1857 	free(vbdev->drv_name);
1858 error_drv_name:
1859 	free(vbdev->key);
1860 error_alloc_key:
1861 	free(vbdev->crypto_bdev.name);
1862 error_bdev_name:
1863 	free(vbdev);
1864 error_vbdev_alloc:
1865 	g_number_of_claimed_volumes--;
1866 	return rc;
1867 }
1868 
1869 /* RPC entry for deleting a crypto vbdev. */
1870 void
1871 delete_crypto_disk(struct spdk_bdev *bdev, spdk_delete_crypto_complete cb_fn,
1872 		   void *cb_arg)
1873 {
1874 	struct bdev_names *name;
1875 
1876 	if (!bdev || bdev->module != &crypto_if) {
1877 		cb_fn(cb_arg, -ENODEV);
1878 		return;
1879 	}
1880 
1881 	/* Remove the association (vbdev, bdev) from g_bdev_names. This is required so that the
1882 	 * vbdev does not get re-created if the same bdev is constructed at some other time,
1883 	 * unless the underlying bdev was hot-removed.
1884 	 */
1885 	TAILQ_FOREACH(name, &g_bdev_names, link) {
1886 		if (strcmp(name->vbdev_name, bdev->name) == 0) {
1887 			TAILQ_REMOVE(&g_bdev_names, name, link);
1888 			free(name->bdev_name);
1889 			free(name->vbdev_name);
1890 			free(name->drv_name);
1891 			free(name->key);
1892 			free(name);
1893 			break;
1894 		}
1895 	}
1896 
1897 	/* Additional cleanup happens in the destruct callback. */
1898 	spdk_bdev_unregister(bdev, cb_fn, cb_arg);
1899 }
1900 
1901 /* Because we specified this function in our crypto bdev function table when we
1902  * registered our crypto bdev, we'll get this call anytime a new bdev shows up.
1903  * Here we need to decide if we care about it and if so what to do. We
1904  * parsed the config file at init so we check the new bdev against the list
1905  * we built up at that time and if the user configured us to attach to this
1906  * bdev, here's where we do it.
1907  */
1908 static void
1909 vbdev_crypto_examine(struct spdk_bdev *bdev)
1910 {
1911 	vbdev_crypto_claim(bdev);
1912 	spdk_bdev_module_examine_done(&crypto_if);
1913 }
1914 
1915 SPDK_LOG_REGISTER_COMPONENT("vbdev_crypto", SPDK_LOG_CRYPTO)
1916