xref: /spdk/module/bdev/crypto/vbdev_crypto.c (revision 0ed85362c8132a2d1927757fbcade66b6660d26a)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUcryptoION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "vbdev_crypto.h"
35 
36 #include "spdk/env.h"
37 #include "spdk/conf.h"
38 #include "spdk/endian.h"
39 #include "spdk/thread.h"
40 #include "spdk/bdev_module.h"
41 #include "spdk_internal/log.h"
42 
43 #include <rte_config.h>
44 #include <rte_version.h>
45 #include <rte_bus_vdev.h>
46 #include <rte_crypto.h>
47 #include <rte_cryptodev.h>
48 #include <rte_cryptodev_pmd.h>
49 
50 /* To add support for new device types, follow the examples of the following...
51  * Note that the string names are defined by the DPDK PMD in question so be
52  * sure to use the exact names.
53  */
54 #define MAX_NUM_DRV_TYPES 2
55 
56 /* The VF spread is the number of queue pairs between virtual functions, we use this to
57  * load balance the QAT device.
58  */
59 #define QAT_VF_SPREAD 32
60 static uint8_t g_qat_total_qp = 0;
61 static uint8_t g_next_qat_index;
62 
63 const char *g_driver_names[MAX_NUM_DRV_TYPES] = { AESNI_MB, QAT };
64 
65 /* Global list of available crypto devices. */
66 struct vbdev_dev {
67 	struct rte_cryptodev_info	cdev_info;	/* includes device friendly name */
68 	uint8_t				cdev_id;	/* identifier for the device */
69 	TAILQ_ENTRY(vbdev_dev)		link;
70 };
71 static TAILQ_HEAD(, vbdev_dev) g_vbdev_devs = TAILQ_HEAD_INITIALIZER(g_vbdev_devs);
72 
73 /* Global list and lock for unique device/queue pair combos. We keep 1 list per supported PMD
74  * so that we can optimize per PMD where it make sense. For example, with QAT there an optimal
75  * pattern for assigning queue pairs where with AESNI there is not.
76  */
77 struct device_qp {
78 	struct vbdev_dev		*device;	/* ptr to crypto device */
79 	uint8_t				qp;		/* queue pair for this node */
80 	bool				in_use;		/* whether this node is in use or not */
81 	uint8_t				index;		/* used by QAT to load balance placement of qpairs */
82 	TAILQ_ENTRY(device_qp)		link;
83 };
84 static TAILQ_HEAD(, device_qp) g_device_qp_qat = TAILQ_HEAD_INITIALIZER(g_device_qp_qat);
85 static TAILQ_HEAD(, device_qp) g_device_qp_aesni_mb = TAILQ_HEAD_INITIALIZER(g_device_qp_aesni_mb);
86 static pthread_mutex_t g_device_qp_lock = PTHREAD_MUTEX_INITIALIZER;
87 
88 
89 /* In order to limit the number of resources we need to do one crypto
90  * operation per LBA (we use LBA as IV), we tell the bdev layer that
91  * our max IO size is something reasonable. Units here are in bytes.
92  */
93 #define CRYPTO_MAX_IO		(64 * 1024)
94 
95 /* This controls how many ops will be dequeued from the crypto driver in one run
96  * of the poller. It is mainly a performance knob as it effectively determines how
97  * much work the poller has to do.  However even that can vary between crypto drivers
98  * as the AESNI_MB driver for example does all the crypto work on dequeue whereas the
99  * QAT driver just dequeues what has been completed already.
100  */
101 #define MAX_DEQUEUE_BURST_SIZE	64
102 
103 /* When enqueueing, we need to supply the crypto driver with an array of pointers to
104  * operation structs. As each of these can be max 512B, we can adjust the CRYPTO_MAX_IO
105  * value in conjunction with the other defines to make sure we're not using crazy amounts
106  * of memory. All of these numbers can and probably should be adjusted based on the
107  * workload. By default we'll use the worst case (smallest) block size for the
108  * minimum number of array entries. As an example, a CRYPTO_MAX_IO size of 64K with 512B
109  * blocks would give us an enqueue array size of 128.
110  */
111 #define MAX_ENQUEUE_ARRAY_SIZE (CRYPTO_MAX_IO / 512)
112 
113 /* The number of MBUFS we need must be a power of two and to support other small IOs
114  * in addition to the limits mentioned above, we go to the next power of two. It is
115  * big number because it is one mempool for source and destination mbufs. It may
116  * need to be bigger to support multiple crypto drivers at once.
117  */
118 #define NUM_MBUFS		32768
119 #define POOL_CACHE_SIZE		256
120 #define MAX_CRYPTO_VOLUMES	128
121 #define NUM_SESSIONS		(2 * MAX_CRYPTO_VOLUMES)
122 #define SESS_MEMPOOL_CACHE_SIZE 0
123 uint8_t g_number_of_claimed_volumes = 0;
124 
125 /* This is the max number of IOs we can supply to any crypto device QP at one time.
126  * It can vary between drivers.
127  */
128 #define CRYPTO_QP_DESCRIPTORS	2048
129 
130 /* Specific to AES_CBC. */
131 #define AES_CBC_IV_LENGTH	16
132 #define AES_CBC_KEY_LENGTH	16
133 #define AES_XTS_KEY_LENGTH	16	/* XTS uses 2 keys, each of this size. */
134 #define AESNI_MB_NUM_QP		64
135 
136 /* Common for suported devices. */
137 #define IV_OFFSET            (sizeof(struct rte_crypto_op) + \
138 				sizeof(struct rte_crypto_sym_op))
139 #define QUEUED_OP_OFFSET (IV_OFFSET + AES_CBC_IV_LENGTH)
140 
141 static void _complete_internal_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
142 static void _complete_internal_read(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
143 static void _complete_internal_write(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
144 static void vbdev_crypto_examine(struct spdk_bdev *bdev);
145 static int vbdev_crypto_claim(struct spdk_bdev *bdev);
146 static void vbdev_crypto_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io);
147 
148 /* List of crypto_bdev names and their base bdevs via configuration file. */
149 struct bdev_names {
150 	char			*vbdev_name;	/* name of the vbdev to create */
151 	char			*bdev_name;	/* base bdev name */
152 
153 	/* Note, for dev/test we allow use of key in the config file, for production
154 	 * use, you must use an RPC to specify the key for security reasons.
155 	 */
156 	uint8_t			*key;		/* key per bdev */
157 	char			*drv_name;	/* name of the crypto device driver */
158 	char			*cipher;	/* AES_CBC or AES_XTS */
159 	uint8_t			*key2;		/* key #2 for AES_XTS, per bdev */
160 	TAILQ_ENTRY(bdev_names)	link;
161 };
162 static TAILQ_HEAD(, bdev_names) g_bdev_names = TAILQ_HEAD_INITIALIZER(g_bdev_names);
163 
164 /* List of virtual bdevs and associated info for each. We keep the device friendly name here even
165  * though its also in the device struct because we use it early on.
166  */
167 struct vbdev_crypto {
168 	struct spdk_bdev		*base_bdev;		/* the thing we're attaching to */
169 	struct spdk_bdev_desc		*base_desc;		/* its descriptor we get from open */
170 	struct spdk_bdev		crypto_bdev;		/* the crypto virtual bdev */
171 	uint8_t				*key;			/* key per bdev */
172 	uint8_t				*key2;			/* for XTS */
173 	uint8_t				*xts_key;		/* key + key 2 */
174 	char				*drv_name;		/* name of the crypto device driver */
175 	char				*cipher;		/* cipher used */
176 	struct rte_cryptodev_sym_session *session_encrypt;	/* encryption session for this bdev */
177 	struct rte_cryptodev_sym_session *session_decrypt;	/* decryption session for this bdev */
178 	struct rte_crypto_sym_xform	cipher_xform;		/* crypto control struct for this bdev */
179 	TAILQ_ENTRY(vbdev_crypto)	link;
180 	struct spdk_thread		*thread;		/* thread where base device is opened */
181 };
182 static TAILQ_HEAD(, vbdev_crypto) g_vbdev_crypto = TAILQ_HEAD_INITIALIZER(g_vbdev_crypto);
183 
184 /* Shared mempools between all devices on this system */
185 static struct rte_mempool *g_session_mp = NULL;
186 static struct rte_mempool *g_session_mp_priv = NULL;
187 static struct spdk_mempool *g_mbuf_mp = NULL;		/* mbuf mempool */
188 static struct rte_mempool *g_crypto_op_mp = NULL;	/* crypto operations, must be rte* mempool */
189 
190 /* For queueing up crypto operations that we can't submit for some reason */
191 struct vbdev_crypto_op {
192 	uint8_t					cdev_id;
193 	uint8_t					qp;
194 	struct rte_crypto_op			*crypto_op;
195 	struct spdk_bdev_io			*bdev_io;
196 	TAILQ_ENTRY(vbdev_crypto_op)		link;
197 };
198 #define QUEUED_OP_LENGTH (sizeof(struct vbdev_crypto_op))
199 
200 /* The crypto vbdev channel struct. It is allocated and freed on my behalf by the io channel code.
201  * We store things in here that are needed on per thread basis like the base_channel for this thread,
202  * and the poller for this thread.
203  */
204 struct crypto_io_channel {
205 	struct spdk_io_channel		*base_ch;		/* IO channel of base device */
206 	struct spdk_poller		*poller;		/* completion poller */
207 	struct device_qp		*device_qp;		/* unique device/qp combination for this channel */
208 	TAILQ_HEAD(, spdk_bdev_io)	pending_cry_ios;	/* outstanding operations to the crypto device */
209 	struct spdk_io_channel_iter	*iter;			/* used with for_each_channel in reset */
210 	TAILQ_HEAD(, vbdev_crypto_op)	queued_cry_ops;		/* queued for re-submission to CryptoDev */
211 };
212 
213 /* This is the crypto per IO context that the bdev layer allocates for us opaquely and attaches to
214  * each IO for us.
215  */
216 struct crypto_bdev_io {
217 	int cryop_cnt_remaining;			/* counter used when completing crypto ops */
218 	struct crypto_io_channel *crypto_ch;		/* need to store for crypto completion handling */
219 	struct vbdev_crypto *crypto_bdev;		/* the crypto node struct associated with this IO */
220 	struct spdk_bdev_io *orig_io;			/* the original IO */
221 	struct spdk_bdev_io *read_io;			/* the read IO we issued */
222 	int8_t bdev_io_status;				/* the status we'll report back on the bdev IO */
223 	bool on_pending_list;
224 	/* Used for the single contiguous buffer that serves as the crypto destination target for writes */
225 	uint64_t aux_num_blocks;			/* num of blocks for the contiguous buffer */
226 	uint64_t aux_offset_blocks;			/* block offset on media */
227 	void *aux_buf_raw;				/* raw buffer that the bdev layer gave us for write buffer */
228 	struct iovec aux_buf_iov;			/* iov representing aligned contig write buffer */
229 
230 	/* for bdev_io_wait */
231 	struct spdk_bdev_io_wait_entry bdev_io_wait;
232 	struct spdk_io_channel *ch;
233 };
234 
235 /* Called by vbdev_crypto_init_crypto_drivers() to init each discovered crypto device */
236 static int
237 create_vbdev_dev(uint8_t index, uint16_t num_lcores)
238 {
239 	struct vbdev_dev *device;
240 	uint8_t j, cdev_id, cdrv_id;
241 	struct device_qp *dev_qp;
242 	struct device_qp *tmp_qp;
243 	int rc;
244 	TAILQ_HEAD(device_qps, device_qp) *dev_qp_head;
245 
246 	device = calloc(1, sizeof(struct vbdev_dev));
247 	if (!device) {
248 		return -ENOMEM;
249 	}
250 
251 	/* Get details about this device. */
252 	rte_cryptodev_info_get(index, &device->cdev_info);
253 	cdrv_id = device->cdev_info.driver_id;
254 	cdev_id = device->cdev_id = index;
255 
256 	/* Before going any further, make sure we have enough resources for this
257 	 * device type to function.  We need a unique queue pair per core accross each
258 	 * device type to remain lockless....
259 	 */
260 	if ((rte_cryptodev_device_count_by_driver(cdrv_id) *
261 	     device->cdev_info.max_nb_queue_pairs) < num_lcores) {
262 		SPDK_ERRLOG("Insufficient unique queue pairs available for %s\n",
263 			    device->cdev_info.driver_name);
264 		SPDK_ERRLOG("Either add more crypto devices or decrease core count\n");
265 		rc = -EINVAL;
266 		goto err;
267 	}
268 
269 	/* Setup queue pairs. */
270 	struct rte_cryptodev_config conf = {
271 		.nb_queue_pairs = device->cdev_info.max_nb_queue_pairs,
272 		.socket_id = SPDK_ENV_SOCKET_ID_ANY
273 	};
274 
275 	rc = rte_cryptodev_configure(cdev_id, &conf);
276 	if (rc < 0) {
277 		SPDK_ERRLOG("Failed to configure cryptodev %u\n", cdev_id);
278 		rc = -EINVAL;
279 		goto err;
280 	}
281 
282 	struct rte_cryptodev_qp_conf qp_conf = {
283 		.nb_descriptors = CRYPTO_QP_DESCRIPTORS,
284 #if RTE_VERSION >= RTE_VERSION_NUM(19, 02, 0, 0)
285 		.mp_session = g_session_mp,
286 		.mp_session_private = g_session_mp_priv,
287 #endif
288 	};
289 
290 	/* Pre-setup all potential qpairs now and assign them in the channel
291 	 * callback. If we were to create them there, we'd have to stop the
292 	 * entire device affecting all other threads that might be using it
293 	 * even on other queue pairs.
294 	 */
295 	for (j = 0; j < device->cdev_info.max_nb_queue_pairs; j++) {
296 #if RTE_VERSION >= RTE_VERSION_NUM(19, 02, 0, 0)
297 		rc = rte_cryptodev_queue_pair_setup(cdev_id, j, &qp_conf, SOCKET_ID_ANY);
298 #else
299 		rc = rte_cryptodev_queue_pair_setup(cdev_id, j, &qp_conf, SOCKET_ID_ANY,
300 						    g_session_mp);
301 #endif
302 
303 		if (rc < 0) {
304 			SPDK_ERRLOG("Failed to setup queue pair %u on "
305 				    "cryptodev %u\n", j, cdev_id);
306 			rc = -EINVAL;
307 			goto err;
308 		}
309 	}
310 
311 	rc = rte_cryptodev_start(cdev_id);
312 	if (rc < 0) {
313 		SPDK_ERRLOG("Failed to start device %u: error %d\n",
314 			    cdev_id, rc);
315 		rc = -EINVAL;
316 		goto err;
317 	}
318 
319 	/* Select the right device/qp list based on driver name
320 	 * or error if it does not exist.
321 	 */
322 	if (strcmp(device->cdev_info.driver_name, QAT) == 0) {
323 		dev_qp_head = (struct device_qps *)&g_device_qp_qat;
324 	} else if (strcmp(device->cdev_info.driver_name, AESNI_MB) == 0) {
325 		dev_qp_head = (struct device_qps *)&g_device_qp_aesni_mb;
326 	} else {
327 		rc = -EINVAL;
328 		goto err;
329 	}
330 
331 	/* Build up lists of device/qp combinations per PMD */
332 	for (j = 0; j < device->cdev_info.max_nb_queue_pairs; j++) {
333 		dev_qp = calloc(1, sizeof(struct device_qp));
334 		if (!dev_qp) {
335 			rc = -ENOMEM;
336 			goto err_qp_alloc;
337 		}
338 		dev_qp->device = device;
339 		dev_qp->qp = j;
340 		dev_qp->in_use = false;
341 		if (strcmp(device->cdev_info.driver_name, QAT) == 0) {
342 			g_qat_total_qp++;
343 		}
344 		TAILQ_INSERT_TAIL(dev_qp_head, dev_qp, link);
345 	}
346 
347 	/* Add to our list of available crypto devices. */
348 	TAILQ_INSERT_TAIL(&g_vbdev_devs, device, link);
349 
350 	return 0;
351 err_qp_alloc:
352 	TAILQ_FOREACH_SAFE(dev_qp, dev_qp_head, link, tmp_qp) {
353 		TAILQ_REMOVE(dev_qp_head, dev_qp, link);
354 		free(dev_qp);
355 	}
356 err:
357 	free(device);
358 
359 	return rc;
360 }
361 
362 /* This is called from the module's init function. We setup all crypto devices early on as we are unable
363  * to easily dynamically configure queue pairs after the drivers are up and running.  So, here, we
364  * configure the max capabilities of each device and assign threads to queue pairs as channels are
365  * requested.
366  */
367 static int
368 vbdev_crypto_init_crypto_drivers(void)
369 {
370 	uint8_t cdev_count;
371 	uint8_t cdev_id;
372 	int i, rc = 0;
373 	struct vbdev_dev *device;
374 	struct vbdev_dev *tmp_dev;
375 	struct device_qp *dev_qp;
376 	unsigned int max_sess_size = 0, sess_size;
377 	uint16_t num_lcores = rte_lcore_count();
378 	char aesni_args[32];
379 
380 	/* Only the first call, via RPC or module init should init the crypto drivers. */
381 	if (g_session_mp != NULL) {
382 		return 0;
383 	}
384 
385 	/* We always init AESNI_MB */
386 	snprintf(aesni_args, sizeof(aesni_args), "max_nb_queue_pairs=%d", AESNI_MB_NUM_QP);
387 	rc = rte_vdev_init(AESNI_MB, aesni_args);
388 	if (rc) {
389 		SPDK_ERRLOG("error creating virtual PMD %s\n", AESNI_MB);
390 		return -EINVAL;
391 	}
392 
393 	/* If we have no crypto devices, there's no reason to continue. */
394 	cdev_count = rte_cryptodev_count();
395 	if (cdev_count == 0) {
396 		return 0;
397 	}
398 
399 	/*
400 	 * Create global mempools, shared by all devices regardless of type.
401 	 */
402 
403 	/* First determine max session size, most pools are shared by all the devices,
404 	 * so we need to find the global max sessions size.
405 	 */
406 	for (cdev_id = 0; cdev_id < cdev_count; cdev_id++) {
407 		sess_size = rte_cryptodev_sym_get_private_session_size(cdev_id);
408 		if (sess_size > max_sess_size) {
409 			max_sess_size = sess_size;
410 		}
411 	}
412 
413 #if RTE_VERSION >= RTE_VERSION_NUM(19, 02, 0, 0)
414 	g_session_mp_priv = rte_mempool_create("session_mp_priv", NUM_SESSIONS, max_sess_size,
415 					       SESS_MEMPOOL_CACHE_SIZE, 0, NULL, NULL, NULL,
416 					       NULL, SOCKET_ID_ANY, 0);
417 	if (g_session_mp_priv == NULL) {
418 		SPDK_ERRLOG("Cannot create private session pool max size 0x%x\n", max_sess_size);
419 		return -ENOMEM;
420 	}
421 
422 	g_session_mp = rte_cryptodev_sym_session_pool_create(
423 			       "session_mp",
424 			       NUM_SESSIONS, 0, SESS_MEMPOOL_CACHE_SIZE, 0,
425 			       SOCKET_ID_ANY);
426 #else
427 	g_session_mp = rte_mempool_create("session_mp", NUM_SESSIONS, max_sess_size,
428 					  SESS_MEMPOOL_CACHE_SIZE,
429 					  0, NULL, NULL, NULL, NULL, SOCKET_ID_ANY, 0);
430 #endif
431 	if (g_session_mp == NULL) {
432 		SPDK_ERRLOG("Cannot create session pool max size 0x%x\n", max_sess_size);
433 		goto error_create_session_mp;
434 		return -ENOMEM;
435 	}
436 
437 	g_mbuf_mp = spdk_mempool_create("mbuf_mp", NUM_MBUFS, sizeof(struct rte_mbuf),
438 					SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
439 					SPDK_ENV_SOCKET_ID_ANY);
440 	if (g_mbuf_mp == NULL) {
441 		SPDK_ERRLOG("Cannot create mbuf pool\n");
442 		rc = -ENOMEM;
443 		goto error_create_mbuf;
444 	}
445 
446 	/* We use per op private data to store the IV and our own struct
447 	 * for queueing ops.
448 	 */
449 	g_crypto_op_mp = rte_crypto_op_pool_create("op_mp",
450 			 RTE_CRYPTO_OP_TYPE_SYMMETRIC,
451 			 NUM_MBUFS,
452 			 POOL_CACHE_SIZE,
453 			 AES_CBC_IV_LENGTH + QUEUED_OP_LENGTH,
454 			 rte_socket_id());
455 
456 	if (g_crypto_op_mp == NULL) {
457 		SPDK_ERRLOG("Cannot create op pool\n");
458 		rc = -ENOMEM;
459 		goto error_create_op;
460 	}
461 
462 	/* Init all devices */
463 	for (i = 0; i < cdev_count; i++) {
464 		rc = create_vbdev_dev(i, num_lcores);
465 		if (rc) {
466 			goto err;
467 		}
468 	}
469 
470 	/* Assign index values to the QAT device qp nodes so that we can
471 	 * assign them for optimal performance.
472 	 */
473 	i = 0;
474 	TAILQ_FOREACH(dev_qp, &g_device_qp_qat, link) {
475 		dev_qp->index = i++;
476 	}
477 
478 	return 0;
479 
480 	/* Error cleanup paths. */
481 err:
482 	TAILQ_FOREACH_SAFE(device, &g_vbdev_devs, link, tmp_dev) {
483 		TAILQ_REMOVE(&g_vbdev_devs, device, link);
484 		free(device);
485 	}
486 	rte_mempool_free(g_crypto_op_mp);
487 	g_crypto_op_mp = NULL;
488 error_create_op:
489 	spdk_mempool_free(g_mbuf_mp);
490 	g_mbuf_mp = NULL;
491 error_create_mbuf:
492 	rte_mempool_free(g_session_mp);
493 	g_session_mp = NULL;
494 error_create_session_mp:
495 	if (g_session_mp_priv != NULL) {
496 		rte_mempool_free(g_session_mp_priv);
497 		g_session_mp_priv = NULL;
498 	}
499 	return rc;
500 }
501 
502 /* Following an encrypt or decrypt we need to then either write the encrypted data or finish
503  * the read on decrypted data. Do that here.
504  */
505 static void
506 _crypto_operation_complete(struct spdk_bdev_io *bdev_io)
507 {
508 	struct vbdev_crypto *crypto_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_crypto,
509 					   crypto_bdev);
510 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
511 	struct crypto_io_channel *crypto_ch = io_ctx->crypto_ch;
512 	struct spdk_bdev_io *free_me = io_ctx->read_io;
513 	int rc = 0;
514 
515 	TAILQ_REMOVE(&crypto_ch->pending_cry_ios, bdev_io, module_link);
516 
517 	if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
518 
519 		/* Complete the original IO and then free the one that we created
520 		 * as a result of issuing an IO via submit_request.
521 		 */
522 		if (io_ctx->bdev_io_status != SPDK_BDEV_IO_STATUS_FAILED) {
523 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
524 		} else {
525 			SPDK_ERRLOG("Issue with decryption on bdev_io %p\n", bdev_io);
526 			rc = -EINVAL;
527 		}
528 		spdk_bdev_free_io(free_me);
529 
530 	} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
531 
532 		if (io_ctx->bdev_io_status != SPDK_BDEV_IO_STATUS_FAILED) {
533 			/* Write the encrypted data. */
534 			rc = spdk_bdev_writev_blocks(crypto_bdev->base_desc, crypto_ch->base_ch,
535 						     &io_ctx->aux_buf_iov, 1, io_ctx->aux_offset_blocks,
536 						     io_ctx->aux_num_blocks, _complete_internal_write,
537 						     bdev_io);
538 		} else {
539 			SPDK_ERRLOG("Issue with encryption on bdev_io %p\n", bdev_io);
540 			rc = -EINVAL;
541 		}
542 
543 	} else {
544 		SPDK_ERRLOG("Unknown bdev type %u on crypto operation completion\n",
545 			    bdev_io->type);
546 		rc = -EINVAL;
547 	}
548 
549 	if (rc) {
550 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
551 	}
552 }
553 
554 static int _crypto_operation(struct spdk_bdev_io *bdev_io,
555 			     enum rte_crypto_cipher_operation crypto_op,
556 			     void *aux_buf);
557 
558 /* This is the poller for the crypto device. It uses a single API to dequeue whatever is ready at
559  * the device. Then we need to decide if what we've got so far (including previous poller
560  * runs) totals up to one or more complete bdev_ios and if so continue with the bdev_io
561  * accordingly. This means either completing a read or issuing a new write.
562  */
563 static int
564 crypto_dev_poller(void *args)
565 {
566 	struct crypto_io_channel *crypto_ch = args;
567 	uint8_t cdev_id = crypto_ch->device_qp->device->cdev_id;
568 	int i, num_dequeued_ops, num_enqueued_ops;
569 	struct spdk_bdev_io *bdev_io = NULL;
570 	struct crypto_bdev_io *io_ctx = NULL;
571 	struct rte_crypto_op *dequeued_ops[MAX_DEQUEUE_BURST_SIZE];
572 	struct rte_crypto_op *mbufs_to_free[2 * MAX_DEQUEUE_BURST_SIZE];
573 	int num_mbufs = 0;
574 	struct vbdev_crypto_op *op_to_resubmit;
575 
576 	/* Each run of the poller will get just what the device has available
577 	 * at the moment we call it, we don't check again after draining the
578 	 * first batch.
579 	 */
580 	num_dequeued_ops = rte_cryptodev_dequeue_burst(cdev_id, crypto_ch->device_qp->qp,
581 			   dequeued_ops, MAX_DEQUEUE_BURST_SIZE);
582 
583 	/* Check if operation was processed successfully */
584 	for (i = 0; i < num_dequeued_ops; i++) {
585 
586 		/* We don't know the order or association of the crypto ops wrt any
587 		 * partiular bdev_io so need to look at each and determine if it's
588 		 * the last one for it's bdev_io or not.
589 		 */
590 		bdev_io = (struct spdk_bdev_io *)dequeued_ops[i]->sym->m_src->userdata;
591 		assert(bdev_io != NULL);
592 		io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
593 
594 		if (dequeued_ops[i]->status != RTE_CRYPTO_OP_STATUS_SUCCESS) {
595 			SPDK_ERRLOG("error with op %d status %u\n", i,
596 				    dequeued_ops[i]->status);
597 			/* Update the bdev status to error, we'll still process the
598 			 * rest of the crypto ops for this bdev_io though so they
599 			 * aren't left hanging.
600 			 */
601 			io_ctx->bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED;
602 		}
603 
604 		assert(io_ctx->cryop_cnt_remaining > 0);
605 
606 		/* Return the associated src and dst mbufs by collecting them into
607 		 * an array that we can use the bulk API to free after the loop.
608 		 */
609 		dequeued_ops[i]->sym->m_src->userdata = NULL;
610 		mbufs_to_free[num_mbufs++] = (void *)dequeued_ops[i]->sym->m_src;
611 		if (dequeued_ops[i]->sym->m_dst) {
612 			mbufs_to_free[num_mbufs++] = (void *)dequeued_ops[i]->sym->m_dst;
613 		}
614 
615 		/* done encrypting, complete the bdev_io */
616 		if (--io_ctx->cryop_cnt_remaining == 0) {
617 
618 			/* If we're completing this with an outstanding reset we need
619 			 * to fail it.
620 			 */
621 			if (crypto_ch->iter) {
622 				io_ctx->bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED;
623 			}
624 
625 			/* Complete the IO */
626 			_crypto_operation_complete(bdev_io);
627 		}
628 	}
629 
630 	/* Now bulk free both mbufs and crypto operations. */
631 	if (num_dequeued_ops > 0) {
632 		rte_mempool_put_bulk(g_crypto_op_mp,
633 				     (void **)dequeued_ops,
634 				     num_dequeued_ops);
635 		assert(num_mbufs > 0);
636 		spdk_mempool_put_bulk(g_mbuf_mp,
637 				      (void **)mbufs_to_free,
638 				      num_mbufs);
639 	}
640 
641 	/* Check if there are any pending crypto ops to process */
642 	while (!TAILQ_EMPTY(&crypto_ch->queued_cry_ops)) {
643 		op_to_resubmit = TAILQ_FIRST(&crypto_ch->queued_cry_ops);
644 		io_ctx = (struct crypto_bdev_io *)op_to_resubmit->bdev_io->driver_ctx;
645 		num_enqueued_ops = rte_cryptodev_enqueue_burst(op_to_resubmit->cdev_id,
646 				   op_to_resubmit->qp,
647 				   &op_to_resubmit->crypto_op,
648 				   1);
649 		if (num_enqueued_ops == 1) {
650 			/* Make sure we don't put this on twice as one bdev_io is made up
651 			 * of many crypto ops.
652 			 */
653 			if (io_ctx->on_pending_list == false) {
654 				TAILQ_INSERT_TAIL(&crypto_ch->pending_cry_ios, op_to_resubmit->bdev_io, module_link);
655 				io_ctx->on_pending_list = true;
656 			}
657 			TAILQ_REMOVE(&crypto_ch->queued_cry_ops, op_to_resubmit, link);
658 		} else {
659 			/* if we couldn't get one, just break and try again later. */
660 			break;
661 		}
662 	}
663 
664 	/* If the channel iter is not NULL, we need to continue to poll
665 	 * until the pending list is empty, then we can move on to the
666 	 * next channel.
667 	 */
668 	if (crypto_ch->iter && TAILQ_EMPTY(&crypto_ch->pending_cry_ios)) {
669 		SPDK_NOTICELOG("Channel %p has been quiesced.\n", crypto_ch);
670 		spdk_for_each_channel_continue(crypto_ch->iter, 0);
671 		crypto_ch->iter = NULL;
672 	}
673 
674 	return num_dequeued_ops;
675 }
676 
677 /* We're either encrypting on the way down or decrypting on the way back. */
678 static int
679 _crypto_operation(struct spdk_bdev_io *bdev_io, enum rte_crypto_cipher_operation crypto_op,
680 		  void *aux_buf)
681 {
682 	uint16_t num_enqueued_ops = 0;
683 	uint32_t cryop_cnt = bdev_io->u.bdev.num_blocks;
684 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
685 	struct crypto_io_channel *crypto_ch = io_ctx->crypto_ch;
686 	uint8_t cdev_id = crypto_ch->device_qp->device->cdev_id;
687 	uint32_t crypto_len = io_ctx->crypto_bdev->crypto_bdev.blocklen;
688 	uint64_t total_length = bdev_io->u.bdev.num_blocks * crypto_len;
689 	int rc;
690 	uint32_t iov_index = 0;
691 	uint32_t allocated = 0;
692 	uint8_t *current_iov = NULL;
693 	uint64_t total_remaining = 0;
694 	uint64_t updated_length, current_iov_remaining = 0;
695 	uint32_t crypto_index = 0;
696 	uint32_t en_offset = 0;
697 	struct rte_crypto_op *crypto_ops[MAX_ENQUEUE_ARRAY_SIZE];
698 	struct rte_mbuf *src_mbufs[MAX_ENQUEUE_ARRAY_SIZE];
699 	struct rte_mbuf *dst_mbufs[MAX_ENQUEUE_ARRAY_SIZE];
700 	int burst;
701 	struct vbdev_crypto_op *op_to_queue;
702 	uint64_t alignment = spdk_bdev_get_buf_align(&io_ctx->crypto_bdev->crypto_bdev);
703 
704 	assert((bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen) <= CRYPTO_MAX_IO);
705 
706 	/* Get the number of source mbufs that we need. These will always be 1:1 because we
707 	 * don't support chaining. The reason we don't is because of our decision to use
708 	 * LBA as IV, there can be no case where we'd need >1 mbuf per crypto op or the
709 	 * op would be > 1 LBA.
710 	 */
711 	rc = spdk_mempool_get_bulk(g_mbuf_mp, (void **)&src_mbufs[0], cryop_cnt);
712 	if (rc) {
713 		SPDK_ERRLOG("ERROR trying to get src_mbufs!\n");
714 		return -ENOMEM;
715 	}
716 
717 	/* Get the same amount but these buffers to describe the encrypted data location (dst). */
718 	if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
719 		rc = spdk_mempool_get_bulk(g_mbuf_mp, (void **)&dst_mbufs[0], cryop_cnt);
720 		if (rc) {
721 			SPDK_ERRLOG("ERROR trying to get dst_mbufs!\n");
722 			rc = -ENOMEM;
723 			goto error_get_dst;
724 		}
725 	}
726 
727 #ifdef __clang_analyzer__
728 	/* silence scan-build false positive */
729 	SPDK_CLANG_ANALYZER_PREINIT_PTR_ARRAY(crypto_ops, MAX_ENQUEUE_ARRAY_SIZE, 0x1000);
730 #endif
731 	/* Allocate crypto operations. */
732 	allocated = rte_crypto_op_bulk_alloc(g_crypto_op_mp,
733 					     RTE_CRYPTO_OP_TYPE_SYMMETRIC,
734 					     crypto_ops, cryop_cnt);
735 	if (allocated < cryop_cnt) {
736 		SPDK_ERRLOG("ERROR trying to get crypto ops!\n");
737 		rc = -ENOMEM;
738 		goto error_get_ops;
739 	}
740 
741 	/* For encryption, we need to prepare a single contiguous buffer as the encryption
742 	 * destination, we'll then pass that along for the write after encryption is done.
743 	 * This is done to avoiding encrypting the provided write buffer which may be
744 	 * undesirable in some use cases.
745 	 */
746 	if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
747 		io_ctx->aux_buf_iov.iov_len = total_length;
748 		io_ctx->aux_buf_raw = aux_buf;
749 		io_ctx->aux_buf_iov.iov_base  = (void *)(((uintptr_t)aux_buf + (alignment - 1)) & ~(alignment - 1));
750 		io_ctx->aux_offset_blocks = bdev_io->u.bdev.offset_blocks;
751 		io_ctx->aux_num_blocks = bdev_io->u.bdev.num_blocks;
752 	}
753 
754 	/* This value is used in the completion callback to determine when the bdev_io is
755 	 * complete.
756 	 */
757 	io_ctx->cryop_cnt_remaining = cryop_cnt;
758 
759 	/* As we don't support chaining because of a decision to use LBA as IV, construction
760 	 * of crypto operations is straightforward. We build both the op, the mbuf and the
761 	 * dst_mbuf in our local arrays by looping through the length of the bdev IO and
762 	 * picking off LBA sized blocks of memory from the IOVs as we walk through them. Each
763 	 * LBA sized chunk of memory will correspond 1:1 to a crypto operation and a single
764 	 * mbuf per crypto operation.
765 	 */
766 	total_remaining = total_length;
767 	current_iov = bdev_io->u.bdev.iovs[iov_index].iov_base;
768 	current_iov_remaining = bdev_io->u.bdev.iovs[iov_index].iov_len;
769 	do {
770 		uint8_t *iv_ptr;
771 		uint64_t op_block_offset;
772 
773 		/* Set the mbuf elements address and length. Null out the next pointer. */
774 		src_mbufs[crypto_index]->buf_addr = current_iov;
775 		src_mbufs[crypto_index]->data_len = updated_length = crypto_len;
776 		/* TODO: Make this assignment conditional on QAT usage and add an assert. */
777 		src_mbufs[crypto_index]->buf_iova = spdk_vtophys((void *)current_iov, &updated_length);
778 		src_mbufs[crypto_index]->next = NULL;
779 		/* Store context in every mbuf as we don't know anything about completion order */
780 		src_mbufs[crypto_index]->userdata = bdev_io;
781 
782 		/* Set the IV - we use the LBA of the crypto_op */
783 		iv_ptr = rte_crypto_op_ctod_offset(crypto_ops[crypto_index], uint8_t *,
784 						   IV_OFFSET);
785 		memset(iv_ptr, 0, AES_CBC_IV_LENGTH);
786 		op_block_offset = bdev_io->u.bdev.offset_blocks + crypto_index;
787 		rte_memcpy(iv_ptr, &op_block_offset, sizeof(uint64_t));
788 
789 		/* Set the data to encrypt/decrypt length */
790 		crypto_ops[crypto_index]->sym->cipher.data.length = crypto_len;
791 		crypto_ops[crypto_index]->sym->cipher.data.offset = 0;
792 
793 		/* link the mbuf to the crypto op. */
794 		crypto_ops[crypto_index]->sym->m_src = src_mbufs[crypto_index];
795 		if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
796 			crypto_ops[crypto_index]->sym->m_dst = src_mbufs[crypto_index];
797 		} else {
798 			crypto_ops[crypto_index]->sym->m_dst = NULL;
799 		}
800 
801 		/* For encrypt, point the destination to a buffer we allocate and redirect the bdev_io
802 		 * that will be used to process the write on completion to the same buffer. Setting
803 		 * up the en_buffer is a little simpler as we know the destination buffer is single IOV.
804 		 */
805 		if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
806 
807 			/* Set the relevant destination en_mbuf elements. */
808 			dst_mbufs[crypto_index]->buf_addr = io_ctx->aux_buf_iov.iov_base + en_offset;
809 			dst_mbufs[crypto_index]->data_len = updated_length = crypto_len;
810 			/* TODO: Make this assignment conditional on QAT usage and add an assert. */
811 			dst_mbufs[crypto_index]->buf_iova = spdk_vtophys(dst_mbufs[crypto_index]->buf_addr,
812 							    &updated_length);
813 			crypto_ops[crypto_index]->sym->m_dst = dst_mbufs[crypto_index];
814 			en_offset += crypto_len;
815 			dst_mbufs[crypto_index]->next = NULL;
816 
817 			/* Attach the crypto session to the operation */
818 			rc = rte_crypto_op_attach_sym_session(crypto_ops[crypto_index],
819 							      io_ctx->crypto_bdev->session_encrypt);
820 			if (rc) {
821 				rc = -EINVAL;
822 				goto error_attach_session;
823 			}
824 
825 		} else {
826 			/* Attach the crypto session to the operation */
827 			rc = rte_crypto_op_attach_sym_session(crypto_ops[crypto_index],
828 							      io_ctx->crypto_bdev->session_decrypt);
829 			if (rc) {
830 				rc = -EINVAL;
831 				goto error_attach_session;
832 			}
833 
834 
835 		}
836 
837 		/* Subtract our running totals for the op in progress and the overall bdev io */
838 		total_remaining -= crypto_len;
839 		current_iov_remaining -= crypto_len;
840 
841 		/* move our current IOV pointer accordingly. */
842 		current_iov += crypto_len;
843 
844 		/* move on to the next crypto operation */
845 		crypto_index++;
846 
847 		/* If we're done with this IOV, move to the next one. */
848 		if (current_iov_remaining == 0 && total_remaining > 0) {
849 			iov_index++;
850 			current_iov = bdev_io->u.bdev.iovs[iov_index].iov_base;
851 			current_iov_remaining = bdev_io->u.bdev.iovs[iov_index].iov_len;
852 		}
853 	} while (total_remaining > 0);
854 
855 	/* Enqueue everything we've got but limit by the max number of descriptors we
856 	 * configured the crypto device for.
857 	 */
858 	burst = spdk_min(cryop_cnt, CRYPTO_QP_DESCRIPTORS);
859 	num_enqueued_ops = rte_cryptodev_enqueue_burst(cdev_id, crypto_ch->device_qp->qp,
860 			   &crypto_ops[0],
861 			   burst);
862 
863 	/* Add this bdev_io to our outstanding list if any of its crypto ops made it. */
864 	if (num_enqueued_ops > 0) {
865 		TAILQ_INSERT_TAIL(&crypto_ch->pending_cry_ios, bdev_io, module_link);
866 		io_ctx->on_pending_list = true;
867 	}
868 	/* We were unable to enqueue everything but did get some, so need to decide what
869 	 * to do based on the status of the last op.
870 	 */
871 	if (num_enqueued_ops < cryop_cnt) {
872 		switch (crypto_ops[num_enqueued_ops]->status) {
873 		case RTE_CRYPTO_OP_STATUS_NOT_PROCESSED:
874 			/* Queue them up on a linked list to be resubmitted via the poller. */
875 			for (crypto_index = num_enqueued_ops; crypto_index < cryop_cnt; crypto_index++) {
876 				op_to_queue = (struct vbdev_crypto_op *)rte_crypto_op_ctod_offset(crypto_ops[crypto_index],
877 						uint8_t *, QUEUED_OP_OFFSET);
878 				op_to_queue->cdev_id = cdev_id;
879 				op_to_queue->qp = crypto_ch->device_qp->qp;
880 				op_to_queue->crypto_op = crypto_ops[crypto_index];
881 				op_to_queue->bdev_io = bdev_io;
882 				TAILQ_INSERT_TAIL(&crypto_ch->queued_cry_ops,
883 						  op_to_queue,
884 						  link);
885 			}
886 			break;
887 		default:
888 			/* For all other statuses, set the io_ctx bdev_io status so that
889 			 * the poller will pick the failure up for the overall bdev status.
890 			 */
891 			io_ctx->bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED;
892 			if (num_enqueued_ops == 0) {
893 				/* If nothing was enqueued, but the last one wasn't because of
894 				 * busy, fail it now as the poller won't know anything about it.
895 				 */
896 				_crypto_operation_complete(bdev_io);
897 				rc = -EINVAL;
898 				goto error_attach_session;
899 			}
900 			break;
901 		}
902 	}
903 
904 	return rc;
905 
906 	/* Error cleanup paths. */
907 error_attach_session:
908 error_get_ops:
909 	if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
910 		spdk_mempool_put_bulk(g_mbuf_mp, (void **)&dst_mbufs[0],
911 				      cryop_cnt);
912 	}
913 	if (allocated > 0) {
914 		rte_mempool_put_bulk(g_crypto_op_mp, (void **)crypto_ops,
915 				     allocated);
916 	}
917 error_get_dst:
918 	spdk_mempool_put_bulk(g_mbuf_mp, (void **)&src_mbufs[0],
919 			      cryop_cnt);
920 	return rc;
921 }
922 
923 /* This function is called after all channels have been quiesced following
924  * a bdev reset.
925  */
926 static void
927 _ch_quiesce_done(struct spdk_io_channel_iter *i, int status)
928 {
929 	struct crypto_bdev_io *io_ctx = spdk_io_channel_iter_get_ctx(i);
930 
931 	assert(TAILQ_EMPTY(&io_ctx->crypto_ch->pending_cry_ios));
932 	assert(io_ctx->orig_io != NULL);
933 
934 	spdk_bdev_io_complete(io_ctx->orig_io, SPDK_BDEV_IO_STATUS_SUCCESS);
935 }
936 
937 /* This function is called per channel to quiesce IOs before completing a
938  * bdev reset that we received.
939  */
940 static void
941 _ch_quiesce(struct spdk_io_channel_iter *i)
942 {
943 	struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
944 	struct crypto_io_channel *crypto_ch = spdk_io_channel_get_ctx(ch);
945 
946 	crypto_ch->iter = i;
947 	/* When the poller runs, it will see the non-NULL iter and handle
948 	 * the quiesce.
949 	 */
950 }
951 
952 /* Completion callback for IO that were issued from this bdev other than read/write.
953  * They have their own for readability.
954  */
955 static void
956 _complete_internal_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
957 {
958 	struct spdk_bdev_io *orig_io = cb_arg;
959 	int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
960 
961 	if (bdev_io->type == SPDK_BDEV_IO_TYPE_RESET) {
962 		struct crypto_bdev_io *orig_ctx = (struct crypto_bdev_io *)orig_io->driver_ctx;
963 
964 		assert(orig_io == orig_ctx->orig_io);
965 
966 		spdk_bdev_free_io(bdev_io);
967 
968 		spdk_for_each_channel(orig_ctx->crypto_bdev,
969 				      _ch_quiesce,
970 				      orig_ctx,
971 				      _ch_quiesce_done);
972 		return;
973 	}
974 
975 	spdk_bdev_io_complete(orig_io, status);
976 	spdk_bdev_free_io(bdev_io);
977 }
978 
979 /* Completion callback for writes that were issued from this bdev. */
980 static void
981 _complete_internal_write(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
982 {
983 	struct spdk_bdev_io *orig_io = cb_arg;
984 	int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
985 	struct crypto_bdev_io *orig_ctx = (struct crypto_bdev_io *)orig_io->driver_ctx;
986 
987 	spdk_bdev_io_put_aux_buf(orig_io, orig_ctx->aux_buf_raw);
988 
989 	spdk_bdev_io_complete(orig_io, status);
990 	spdk_bdev_free_io(bdev_io);
991 }
992 
993 /* Completion callback for reads that were issued from this bdev. */
994 static void
995 _complete_internal_read(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
996 {
997 	struct spdk_bdev_io *orig_io = cb_arg;
998 	struct crypto_bdev_io *orig_ctx = (struct crypto_bdev_io *)orig_io->driver_ctx;
999 
1000 	if (success) {
1001 
1002 		/* Save off this bdev_io so it can be freed after decryption. */
1003 		orig_ctx->read_io = bdev_io;
1004 
1005 		if (!_crypto_operation(orig_io, RTE_CRYPTO_CIPHER_OP_DECRYPT, NULL)) {
1006 			return;
1007 		} else {
1008 			SPDK_ERRLOG("ERROR decrypting\n");
1009 		}
1010 	} else {
1011 		SPDK_ERRLOG("ERROR on read prior to decrypting\n");
1012 	}
1013 
1014 	spdk_bdev_io_complete(orig_io, SPDK_BDEV_IO_STATUS_FAILED);
1015 	spdk_bdev_free_io(bdev_io);
1016 }
1017 
1018 static void
1019 vbdev_crypto_resubmit_io(void *arg)
1020 {
1021 	struct spdk_bdev_io *bdev_io = (struct spdk_bdev_io *)arg;
1022 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
1023 
1024 	vbdev_crypto_submit_request(io_ctx->ch, bdev_io);
1025 }
1026 
1027 static void
1028 vbdev_crypto_queue_io(struct spdk_bdev_io *bdev_io)
1029 {
1030 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
1031 	int rc;
1032 
1033 	io_ctx->bdev_io_wait.bdev = bdev_io->bdev;
1034 	io_ctx->bdev_io_wait.cb_fn = vbdev_crypto_resubmit_io;
1035 	io_ctx->bdev_io_wait.cb_arg = bdev_io;
1036 
1037 	rc = spdk_bdev_queue_io_wait(bdev_io->bdev, io_ctx->crypto_ch->base_ch, &io_ctx->bdev_io_wait);
1038 	if (rc != 0) {
1039 		SPDK_ERRLOG("Queue io failed in vbdev_crypto_queue_io, rc=%d.\n", rc);
1040 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1041 	}
1042 }
1043 
1044 /* Callback for getting a buf from the bdev pool in the event that the caller passed
1045  * in NULL, we need to own the buffer so it doesn't get freed by another vbdev module
1046  * beneath us before we're done with it.
1047  */
1048 static void
1049 crypto_read_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
1050 		       bool success)
1051 {
1052 	struct vbdev_crypto *crypto_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_crypto,
1053 					   crypto_bdev);
1054 	struct crypto_io_channel *crypto_ch = spdk_io_channel_get_ctx(ch);
1055 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
1056 	int rc;
1057 
1058 	if (!success) {
1059 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1060 		return;
1061 	}
1062 
1063 	rc = spdk_bdev_readv_blocks(crypto_bdev->base_desc, crypto_ch->base_ch, bdev_io->u.bdev.iovs,
1064 				    bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.offset_blocks,
1065 				    bdev_io->u.bdev.num_blocks, _complete_internal_read,
1066 				    bdev_io);
1067 	if (rc != 0) {
1068 		if (rc == -ENOMEM) {
1069 			SPDK_DEBUGLOG(SPDK_LOG_CRYPTO, "No memory, queue the IO.\n");
1070 			io_ctx->ch = ch;
1071 			vbdev_crypto_queue_io(bdev_io);
1072 		} else {
1073 			SPDK_ERRLOG("ERROR on bdev_io submission!\n");
1074 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1075 		}
1076 	}
1077 }
1078 
1079 /* For encryption we don't want to encrypt the data in place as the host isn't
1080  * expecting us to mangle its data buffers so we need to encrypt into the bdev
1081  * aux buffer, then we can use that as the source for the disk data transfer.
1082  */
1083 static void
1084 crypto_write_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
1085 			void *aux_buf)
1086 {
1087 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
1088 	int rc = 0;
1089 
1090 	rc = _crypto_operation(bdev_io, RTE_CRYPTO_CIPHER_OP_ENCRYPT, aux_buf);
1091 	if (rc != 0) {
1092 		spdk_bdev_io_put_aux_buf(bdev_io, aux_buf);
1093 		if (rc == -ENOMEM) {
1094 			SPDK_DEBUGLOG(SPDK_LOG_CRYPTO, "No memory, queue the IO.\n");
1095 			io_ctx->ch = ch;
1096 			vbdev_crypto_queue_io(bdev_io);
1097 		} else {
1098 			SPDK_ERRLOG("ERROR on bdev_io submission!\n");
1099 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1100 		}
1101 	}
1102 }
1103 
1104 /* Called when someone submits IO to this crypto vbdev. For IO's not relevant to crypto,
1105  * we're simply passing it on here via SPDK IO calls which in turn allocate another bdev IO
1106  * and call our cpl callback provided below along with the original bdev_io so that we can
1107  * complete it once this IO completes. For crypto operations, we'll either encrypt it first
1108  * (writes) then call back into bdev to submit it or we'll submit a read and then catch it
1109  * on the way back for decryption.
1110  */
1111 static void
1112 vbdev_crypto_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
1113 {
1114 	struct vbdev_crypto *crypto_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_crypto,
1115 					   crypto_bdev);
1116 	struct crypto_io_channel *crypto_ch = spdk_io_channel_get_ctx(ch);
1117 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
1118 	int rc = 0;
1119 
1120 	memset(io_ctx, 0, sizeof(struct crypto_bdev_io));
1121 	io_ctx->crypto_bdev = crypto_bdev;
1122 	io_ctx->crypto_ch = crypto_ch;
1123 	io_ctx->orig_io = bdev_io;
1124 	io_ctx->bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
1125 
1126 	switch (bdev_io->type) {
1127 	case SPDK_BDEV_IO_TYPE_READ:
1128 		spdk_bdev_io_get_buf(bdev_io, crypto_read_get_buf_cb,
1129 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
1130 		break;
1131 	case SPDK_BDEV_IO_TYPE_WRITE:
1132 		/* Tell the bdev layer that we need an aux buf in addition to the data
1133 		 * buf already associated with the bdev.
1134 		 */
1135 		spdk_bdev_io_get_aux_buf(bdev_io, crypto_write_get_buf_cb);
1136 		break;
1137 	case SPDK_BDEV_IO_TYPE_UNMAP:
1138 		rc = spdk_bdev_unmap_blocks(crypto_bdev->base_desc, crypto_ch->base_ch,
1139 					    bdev_io->u.bdev.offset_blocks,
1140 					    bdev_io->u.bdev.num_blocks,
1141 					    _complete_internal_io, bdev_io);
1142 		break;
1143 	case SPDK_BDEV_IO_TYPE_FLUSH:
1144 		rc = spdk_bdev_flush_blocks(crypto_bdev->base_desc, crypto_ch->base_ch,
1145 					    bdev_io->u.bdev.offset_blocks,
1146 					    bdev_io->u.bdev.num_blocks,
1147 					    _complete_internal_io, bdev_io);
1148 		break;
1149 	case SPDK_BDEV_IO_TYPE_RESET:
1150 		rc = spdk_bdev_reset(crypto_bdev->base_desc, crypto_ch->base_ch,
1151 				     _complete_internal_io, bdev_io);
1152 		break;
1153 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
1154 	default:
1155 		SPDK_ERRLOG("crypto: unknown I/O type %d\n", bdev_io->type);
1156 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1157 		return;
1158 	}
1159 
1160 	if (rc != 0) {
1161 		if (rc == -ENOMEM) {
1162 			SPDK_DEBUGLOG(SPDK_LOG_CRYPTO, "No memory, queue the IO.\n");
1163 			io_ctx->ch = ch;
1164 			vbdev_crypto_queue_io(bdev_io);
1165 		} else {
1166 			SPDK_ERRLOG("ERROR on bdev_io submission!\n");
1167 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1168 		}
1169 	}
1170 }
1171 
1172 /* We'll just call the base bdev and let it answer except for WZ command which
1173  * we always say we don't support so that the bdev layer will actually send us
1174  * real writes that we can encrypt.
1175  */
1176 static bool
1177 vbdev_crypto_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
1178 {
1179 	struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx;
1180 
1181 	switch (io_type) {
1182 	case SPDK_BDEV_IO_TYPE_WRITE:
1183 	case SPDK_BDEV_IO_TYPE_UNMAP:
1184 	case SPDK_BDEV_IO_TYPE_RESET:
1185 	case SPDK_BDEV_IO_TYPE_READ:
1186 	case SPDK_BDEV_IO_TYPE_FLUSH:
1187 		return spdk_bdev_io_type_supported(crypto_bdev->base_bdev, io_type);
1188 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
1189 	/* Force the bdev layer to issue actual writes of zeroes so we can
1190 	 * encrypt them as regular writes.
1191 	 */
1192 	default:
1193 		return false;
1194 	}
1195 }
1196 
1197 /* Callback for unregistering the IO device. */
1198 static void
1199 _device_unregister_cb(void *io_device)
1200 {
1201 	struct vbdev_crypto *crypto_bdev = io_device;
1202 
1203 	/* Done with this crypto_bdev. */
1204 	rte_cryptodev_sym_session_free(crypto_bdev->session_decrypt);
1205 	rte_cryptodev_sym_session_free(crypto_bdev->session_encrypt);
1206 	free(crypto_bdev->drv_name);
1207 	if (crypto_bdev->key) {
1208 		memset(crypto_bdev->key, 0, strnlen(crypto_bdev->key, (AES_CBC_KEY_LENGTH + 1)));
1209 		free(crypto_bdev->key);
1210 	}
1211 	if (crypto_bdev->key2) {
1212 		memset(crypto_bdev->key2, 0, strnlen(crypto_bdev->key2, (AES_XTS_KEY_LENGTH + 1)));
1213 		free(crypto_bdev->key2);
1214 	}
1215 	if (crypto_bdev->xts_key) {
1216 		memset(crypto_bdev->xts_key, 0, strnlen(crypto_bdev->xts_key, (AES_XTS_KEY_LENGTH * 2) + 1));
1217 		free(crypto_bdev->xts_key);
1218 	}
1219 	free(crypto_bdev->crypto_bdev.name);
1220 	free(crypto_bdev);
1221 }
1222 
1223 /* Wrapper for the bdev close operation. */
1224 static void
1225 _vbdev_crypto_destruct(void *ctx)
1226 {
1227 	struct spdk_bdev_desc *desc = ctx;
1228 
1229 	spdk_bdev_close(desc);
1230 }
1231 
1232 /* Called after we've unregistered following a hot remove callback.
1233  * Our finish entry point will be called next.
1234  */
1235 static int
1236 vbdev_crypto_destruct(void *ctx)
1237 {
1238 	struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx;
1239 
1240 	/* Remove this device from the internal list */
1241 	TAILQ_REMOVE(&g_vbdev_crypto, crypto_bdev, link);
1242 
1243 	/* Unclaim the underlying bdev. */
1244 	spdk_bdev_module_release_bdev(crypto_bdev->base_bdev);
1245 
1246 	/* Close the underlying bdev on its same opened thread. */
1247 	if (crypto_bdev->thread && crypto_bdev->thread != spdk_get_thread()) {
1248 		spdk_thread_send_msg(crypto_bdev->thread, _vbdev_crypto_destruct, crypto_bdev->base_desc);
1249 	} else {
1250 		spdk_bdev_close(crypto_bdev->base_desc);
1251 	}
1252 
1253 	/* Unregister the io_device. */
1254 	spdk_io_device_unregister(crypto_bdev, _device_unregister_cb);
1255 
1256 	g_number_of_claimed_volumes--;
1257 
1258 	return 0;
1259 }
1260 
1261 /* We supplied this as an entry point for upper layers who want to communicate to this
1262  * bdev.  This is how they get a channel. We are passed the same context we provided when
1263  * we created our crypto vbdev in examine() which, for this bdev, is the address of one of
1264  * our context nodes. From here we'll ask the SPDK channel code to fill out our channel
1265  * struct and we'll keep it in our crypto node.
1266  */
1267 static struct spdk_io_channel *
1268 vbdev_crypto_get_io_channel(void *ctx)
1269 {
1270 	struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx;
1271 
1272 	/* The IO channel code will allocate a channel for us which consists of
1273 	 * the SPDK channel structure plus the size of our crypto_io_channel struct
1274 	 * that we passed in when we registered our IO device. It will then call
1275 	 * our channel create callback to populate any elements that we need to
1276 	 * update.
1277 	 */
1278 	return spdk_get_io_channel(crypto_bdev);
1279 }
1280 
1281 /* This is the output for bdev_get_bdevs() for this vbdev */
1282 static int
1283 vbdev_crypto_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
1284 {
1285 	struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx;
1286 
1287 	spdk_json_write_name(w, "crypto");
1288 	spdk_json_write_object_begin(w);
1289 	spdk_json_write_named_string(w, "base_bdev_name", spdk_bdev_get_name(crypto_bdev->base_bdev));
1290 	spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&crypto_bdev->crypto_bdev));
1291 	spdk_json_write_named_string(w, "crypto_pmd", crypto_bdev->drv_name);
1292 	spdk_json_write_named_string(w, "key", crypto_bdev->key);
1293 	if (strcmp(crypto_bdev->cipher, AES_XTS) == 0) {
1294 		spdk_json_write_named_string(w, "key2", crypto_bdev->key);
1295 	}
1296 	spdk_json_write_named_string(w, "cipher", crypto_bdev->cipher);
1297 	spdk_json_write_object_end(w);
1298 	return 0;
1299 }
1300 
1301 static int
1302 vbdev_crypto_config_json(struct spdk_json_write_ctx *w)
1303 {
1304 	struct vbdev_crypto *crypto_bdev;
1305 
1306 	TAILQ_FOREACH(crypto_bdev, &g_vbdev_crypto, link) {
1307 		spdk_json_write_object_begin(w);
1308 		spdk_json_write_named_string(w, "method", "bdev_crypto_create");
1309 		spdk_json_write_named_object_begin(w, "params");
1310 		spdk_json_write_named_string(w, "base_bdev_name", spdk_bdev_get_name(crypto_bdev->base_bdev));
1311 		spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&crypto_bdev->crypto_bdev));
1312 		spdk_json_write_named_string(w, "crypto_pmd", crypto_bdev->drv_name);
1313 		spdk_json_write_named_string(w, "key", crypto_bdev->key);
1314 		if (strcmp(crypto_bdev->cipher, AES_XTS) == 0) {
1315 			spdk_json_write_named_string(w, "key2", crypto_bdev->key);
1316 		}
1317 		spdk_json_write_named_string(w, "cipher", crypto_bdev->cipher);
1318 		spdk_json_write_object_end(w);
1319 		spdk_json_write_object_end(w);
1320 	}
1321 	return 0;
1322 }
1323 
1324 /* Helper function for the channel creation callback. */
1325 static void
1326 _assign_device_qp(struct vbdev_crypto *crypto_bdev, struct device_qp *device_qp,
1327 		  struct crypto_io_channel *crypto_ch)
1328 {
1329 	pthread_mutex_lock(&g_device_qp_lock);
1330 	if (strcmp(crypto_bdev->drv_name, QAT) == 0) {
1331 		/* For some QAT devices, the optimal qp to use is every 32nd as this spreads the
1332 		 * workload out over the multiple virtual functions in the device. For the devices
1333 		 * where this isn't the case, it doesn't hurt.
1334 		 */
1335 		TAILQ_FOREACH(device_qp, &g_device_qp_qat, link) {
1336 			if (device_qp->index != g_next_qat_index) {
1337 				continue;
1338 			}
1339 			if (device_qp->in_use == false) {
1340 				crypto_ch->device_qp = device_qp;
1341 				device_qp->in_use = true;
1342 				g_next_qat_index = (g_next_qat_index + QAT_VF_SPREAD) % g_qat_total_qp;
1343 				break;
1344 			} else {
1345 				/* if the preferred index is used, skip to the next one in this set. */
1346 				g_next_qat_index = (g_next_qat_index + 1) % g_qat_total_qp;
1347 			}
1348 		}
1349 	} else if (strcmp(crypto_bdev->drv_name, AESNI_MB) == 0) {
1350 		TAILQ_FOREACH(device_qp, &g_device_qp_aesni_mb, link) {
1351 			if (device_qp->in_use == false) {
1352 				crypto_ch->device_qp = device_qp;
1353 				device_qp->in_use = true;
1354 				break;
1355 			}
1356 		}
1357 	}
1358 	pthread_mutex_unlock(&g_device_qp_lock);
1359 }
1360 
1361 /* We provide this callback for the SPDK channel code to create a channel using
1362  * the channel struct we provided in our module get_io_channel() entry point. Here
1363  * we get and save off an underlying base channel of the device below us so that
1364  * we can communicate with the base bdev on a per channel basis. We also register the
1365  * poller used to complete crypto operations from the device.
1366  */
1367 static int
1368 crypto_bdev_ch_create_cb(void *io_device, void *ctx_buf)
1369 {
1370 	struct crypto_io_channel *crypto_ch = ctx_buf;
1371 	struct vbdev_crypto *crypto_bdev = io_device;
1372 	struct device_qp *device_qp = NULL;
1373 
1374 	crypto_ch->base_ch = spdk_bdev_get_io_channel(crypto_bdev->base_desc);
1375 	crypto_ch->poller = SPDK_POLLER_REGISTER(crypto_dev_poller, crypto_ch, 0);
1376 	crypto_ch->device_qp = NULL;
1377 
1378 	/* Assign a device/qp combination that is unique per channel per PMD. */
1379 	_assign_device_qp(crypto_bdev, device_qp, crypto_ch);
1380 	assert(crypto_ch->device_qp);
1381 
1382 	/* We use this queue to track outstanding IO in our layer. */
1383 	TAILQ_INIT(&crypto_ch->pending_cry_ios);
1384 
1385 	/* We use this to queue up crypto ops when the device is busy. */
1386 	TAILQ_INIT(&crypto_ch->queued_cry_ops);
1387 
1388 	return 0;
1389 }
1390 
1391 /* We provide this callback for the SPDK channel code to destroy a channel
1392  * created with our create callback. We just need to undo anything we did
1393  * when we created.
1394  */
1395 static void
1396 crypto_bdev_ch_destroy_cb(void *io_device, void *ctx_buf)
1397 {
1398 	struct crypto_io_channel *crypto_ch = ctx_buf;
1399 
1400 	pthread_mutex_lock(&g_device_qp_lock);
1401 	crypto_ch->device_qp->in_use = false;
1402 	pthread_mutex_unlock(&g_device_qp_lock);
1403 
1404 	spdk_poller_unregister(&crypto_ch->poller);
1405 	spdk_put_io_channel(crypto_ch->base_ch);
1406 }
1407 
1408 /* Create the association from the bdev and vbdev name and insert
1409  * on the global list. */
1410 static int
1411 vbdev_crypto_insert_name(const char *bdev_name, const char *vbdev_name,
1412 			 const char *crypto_pmd, const char *key,
1413 			 const char *cipher, const char *key2)
1414 {
1415 	struct bdev_names *name;
1416 	int rc, j;
1417 	bool found = false;
1418 
1419 	TAILQ_FOREACH(name, &g_bdev_names, link) {
1420 		if (strcmp(vbdev_name, name->vbdev_name) == 0) {
1421 			SPDK_ERRLOG("crypto bdev %s already exists\n", vbdev_name);
1422 			return -EEXIST;
1423 		}
1424 	}
1425 
1426 	name = calloc(1, sizeof(struct bdev_names));
1427 	if (!name) {
1428 		SPDK_ERRLOG("could not allocate bdev_names\n");
1429 		return -ENOMEM;
1430 	}
1431 
1432 	name->bdev_name = strdup(bdev_name);
1433 	if (!name->bdev_name) {
1434 		SPDK_ERRLOG("could not allocate name->bdev_name\n");
1435 		rc = -ENOMEM;
1436 		goto error_alloc_bname;
1437 	}
1438 
1439 	name->vbdev_name = strdup(vbdev_name);
1440 	if (!name->vbdev_name) {
1441 		SPDK_ERRLOG("could not allocate name->vbdev_name\n");
1442 		rc = -ENOMEM;
1443 		goto error_alloc_vname;
1444 	}
1445 
1446 	name->drv_name = strdup(crypto_pmd);
1447 	if (!name->drv_name) {
1448 		SPDK_ERRLOG("could not allocate name->drv_name\n");
1449 		rc = -ENOMEM;
1450 		goto error_alloc_dname;
1451 	}
1452 	for (j = 0; j < MAX_NUM_DRV_TYPES ; j++) {
1453 		if (strcmp(crypto_pmd, g_driver_names[j]) == 0) {
1454 			found = true;
1455 			break;
1456 		}
1457 	}
1458 	if (!found) {
1459 		SPDK_ERRLOG("invalid crypto PMD type %s\n", crypto_pmd);
1460 		rc = -EINVAL;
1461 		goto error_invalid_pmd;
1462 	}
1463 
1464 	name->key = strdup(key);
1465 	if (!name->key) {
1466 		SPDK_ERRLOG("could not allocate name->key\n");
1467 		rc = -ENOMEM;
1468 		goto error_alloc_key;
1469 	}
1470 	if (strnlen(name->key, (AES_CBC_KEY_LENGTH + 1)) != AES_CBC_KEY_LENGTH) {
1471 		SPDK_ERRLOG("invalid AES_CBC key length\n");
1472 		rc = -EINVAL;
1473 		goto error_invalid_key;
1474 	}
1475 
1476 	if (strncmp(cipher, AES_XTS, sizeof(AES_XTS)) == 0) {
1477 		/* To please scan-build, input validation makes sure we can't
1478 		 * have this cipher without providing a key2.
1479 		 */
1480 		name->cipher = AES_XTS;
1481 		assert(key2);
1482 		if (strnlen(key2, (AES_XTS_KEY_LENGTH + 1)) != AES_XTS_KEY_LENGTH) {
1483 			SPDK_ERRLOG("invalid AES_XTS key length\n");
1484 			rc = -EINVAL;
1485 			goto error_invalid_key2;
1486 		}
1487 
1488 		name->key2 = strdup(key2);
1489 		if (!name->key2) {
1490 			SPDK_ERRLOG("could not allocate name->key2\n");
1491 			rc = -ENOMEM;
1492 			goto error_alloc_key2;
1493 		}
1494 	} else if (strncmp(cipher, AES_CBC, sizeof(AES_CBC)) == 0) {
1495 		name->cipher = AES_CBC;
1496 	} else {
1497 		SPDK_ERRLOG("Invalid cipher: %s\n", cipher);
1498 		rc = -EINVAL;
1499 		goto error_cipher;
1500 	}
1501 
1502 	TAILQ_INSERT_TAIL(&g_bdev_names, name, link);
1503 
1504 	return 0;
1505 
1506 	/* Error cleanup paths. */
1507 error_cipher:
1508 	free(name->key2);
1509 error_alloc_key2:
1510 error_invalid_key2:
1511 error_invalid_key:
1512 	free(name->key);
1513 error_alloc_key:
1514 error_invalid_pmd:
1515 	free(name->drv_name);
1516 error_alloc_dname:
1517 	free(name->vbdev_name);
1518 error_alloc_vname:
1519 	free(name->bdev_name);
1520 error_alloc_bname:
1521 	free(name);
1522 	return rc;
1523 }
1524 
1525 /* RPC entry point for crypto creation. */
1526 int
1527 create_crypto_disk(const char *bdev_name, const char *vbdev_name,
1528 		   const char *crypto_pmd, const char *key,
1529 		   const char *cipher, const char *key2)
1530 {
1531 	struct spdk_bdev *bdev = NULL;
1532 	int rc = 0;
1533 
1534 	bdev = spdk_bdev_get_by_name(bdev_name);
1535 
1536 	rc = vbdev_crypto_insert_name(bdev_name, vbdev_name, crypto_pmd, key, cipher, key2);
1537 	if (rc) {
1538 		return rc;
1539 	}
1540 
1541 	if (!bdev) {
1542 		SPDK_NOTICELOG("vbdev creation deferred pending base bdev arrival\n");
1543 		return 0;
1544 	}
1545 
1546 	rc = vbdev_crypto_claim(bdev);
1547 	if (rc) {
1548 		return rc;
1549 	}
1550 
1551 	return rc;
1552 }
1553 
1554 /* Called at driver init time, parses config file to prepare for examine calls,
1555  * also fully initializes the crypto drivers.
1556  */
1557 static int
1558 vbdev_crypto_init(void)
1559 {
1560 	struct spdk_conf_section *sp = NULL;
1561 	const char *conf_bdev_name = NULL;
1562 	const char *conf_vbdev_name = NULL;
1563 	const char *crypto_pmd = NULL;
1564 	int i;
1565 	int rc = 0;
1566 	const char *key = NULL;
1567 	const char *cipher = NULL;
1568 	const char *key2 = NULL;
1569 
1570 	/* Fully configure both SW and HW drivers. */
1571 	rc = vbdev_crypto_init_crypto_drivers();
1572 	if (rc) {
1573 		SPDK_ERRLOG("Error setting up crypto devices\n");
1574 		return rc;
1575 	}
1576 
1577 	sp = spdk_conf_find_section(NULL, "crypto");
1578 	if (sp == NULL) {
1579 		return 0;
1580 	}
1581 
1582 	for (i = 0; ; i++) {
1583 
1584 		if (!spdk_conf_section_get_nval(sp, "CRY", i)) {
1585 			break;
1586 		}
1587 
1588 		conf_bdev_name = spdk_conf_section_get_nmval(sp, "CRY", i, 0);
1589 		if (!conf_bdev_name) {
1590 			SPDK_ERRLOG("crypto configuration missing bdev name\n");
1591 			return -EINVAL;
1592 		}
1593 
1594 		conf_vbdev_name = spdk_conf_section_get_nmval(sp, "CRY", i, 1);
1595 		if (!conf_vbdev_name) {
1596 			SPDK_ERRLOG("crypto configuration missing crypto_bdev name\n");
1597 			return -EINVAL;
1598 		}
1599 
1600 		key = spdk_conf_section_get_nmval(sp, "CRY", i, 2);
1601 		if (!key) {
1602 			SPDK_ERRLOG("crypto configuration missing crypto_bdev key\n");
1603 			return -EINVAL;
1604 		}
1605 		SPDK_NOTICELOG("WARNING: You are storing your key in a plain text file!!\n");
1606 
1607 		crypto_pmd = spdk_conf_section_get_nmval(sp, "CRY", i, 3);
1608 		if (!crypto_pmd) {
1609 			SPDK_ERRLOG("crypto configuration missing driver type\n");
1610 			return -EINVAL;
1611 		}
1612 
1613 		/* These are optional. */
1614 		cipher = spdk_conf_section_get_nmval(sp, "CRY", i, 4);
1615 		if (cipher == NULL) {
1616 			cipher = AES_CBC;
1617 		}
1618 		key2 = spdk_conf_section_get_nmval(sp, "CRY", i, 5);
1619 
1620 		/* Note: config file options do not support QAT AES_XTS, use RPC */
1621 		rc = vbdev_crypto_insert_name(conf_bdev_name, conf_vbdev_name,
1622 					      crypto_pmd, key, cipher, key2);
1623 		if (rc != 0) {
1624 			return rc;
1625 		}
1626 	}
1627 
1628 	return rc;
1629 }
1630 
1631 /* Called when the entire module is being torn down. */
1632 static void
1633 vbdev_crypto_finish(void)
1634 {
1635 	struct bdev_names *name;
1636 	struct vbdev_dev *device;
1637 	struct device_qp *dev_qp;
1638 	unsigned i;
1639 	int rc;
1640 
1641 	while ((name = TAILQ_FIRST(&g_bdev_names))) {
1642 		TAILQ_REMOVE(&g_bdev_names, name, link);
1643 		free(name->drv_name);
1644 		free(name->key);
1645 		free(name->bdev_name);
1646 		free(name->vbdev_name);
1647 		free(name->key2);
1648 		free(name);
1649 	}
1650 
1651 	while ((device = TAILQ_FIRST(&g_vbdev_devs))) {
1652 		struct rte_cryptodev *rte_dev;
1653 
1654 		TAILQ_REMOVE(&g_vbdev_devs, device, link);
1655 		rte_cryptodev_stop(device->cdev_id);
1656 
1657 		assert(device->cdev_id < RTE_CRYPTO_MAX_DEVS);
1658 		rte_dev = &rte_cryptodevs[device->cdev_id];
1659 
1660 		if (rte_dev->dev_ops->queue_pair_release != NULL) {
1661 			for (i = 0; i < device->cdev_info.max_nb_queue_pairs; i++) {
1662 				rte_dev->dev_ops->queue_pair_release(rte_dev, i);
1663 			}
1664 		}
1665 		free(device);
1666 	}
1667 	rc = rte_vdev_uninit(AESNI_MB);
1668 	if (rc) {
1669 		SPDK_ERRLOG("%d from rte_vdev_uninit\n", rc);
1670 	}
1671 
1672 	while ((dev_qp = TAILQ_FIRST(&g_device_qp_qat))) {
1673 		TAILQ_REMOVE(&g_device_qp_qat, dev_qp, link);
1674 		free(dev_qp);
1675 	}
1676 
1677 	while ((dev_qp = TAILQ_FIRST(&g_device_qp_aesni_mb))) {
1678 		TAILQ_REMOVE(&g_device_qp_aesni_mb, dev_qp, link);
1679 		free(dev_qp);
1680 	}
1681 
1682 	rte_mempool_free(g_crypto_op_mp);
1683 	spdk_mempool_free(g_mbuf_mp);
1684 	rte_mempool_free(g_session_mp);
1685 	if (g_session_mp_priv != NULL) {
1686 		rte_mempool_free(g_session_mp_priv);
1687 	}
1688 }
1689 
1690 /* During init we'll be asked how much memory we'd like passed to us
1691  * in bev_io structures as context. Here's where we specify how
1692  * much context we want per IO.
1693  */
1694 static int
1695 vbdev_crypto_get_ctx_size(void)
1696 {
1697 	return sizeof(struct crypto_bdev_io);
1698 }
1699 
1700 /* Called when SPDK wants to save the current config of this vbdev module to
1701  * a file.
1702  */
1703 static void
1704 vbdev_crypto_get_spdk_running_config(FILE *fp)
1705 {
1706 	struct bdev_names *names = NULL;
1707 	fprintf(fp, "\n[crypto]\n");
1708 	TAILQ_FOREACH(names, &g_bdev_names, link) {
1709 		fprintf(fp, "  crypto %s %s ", names->bdev_name, names->vbdev_name);
1710 		fprintf(fp, "\n");
1711 	}
1712 
1713 	fprintf(fp, "\n");
1714 }
1715 
1716 /* Called when the underlying base bdev goes away. */
1717 static void
1718 vbdev_crypto_examine_hotremove_cb(void *ctx)
1719 {
1720 	struct vbdev_crypto *crypto_bdev, *tmp;
1721 	struct spdk_bdev *bdev_find = ctx;
1722 
1723 	TAILQ_FOREACH_SAFE(crypto_bdev, &g_vbdev_crypto, link, tmp) {
1724 		if (bdev_find == crypto_bdev->base_bdev) {
1725 			spdk_bdev_unregister(&crypto_bdev->crypto_bdev, NULL, NULL);
1726 		}
1727 	}
1728 }
1729 
1730 static void
1731 vbdev_crypto_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
1732 {
1733 	/* No config per bdev needed */
1734 }
1735 
1736 /* When we register our bdev this is how we specify our entry points. */
1737 static const struct spdk_bdev_fn_table vbdev_crypto_fn_table = {
1738 	.destruct		= vbdev_crypto_destruct,
1739 	.submit_request		= vbdev_crypto_submit_request,
1740 	.io_type_supported	= vbdev_crypto_io_type_supported,
1741 	.get_io_channel		= vbdev_crypto_get_io_channel,
1742 	.dump_info_json		= vbdev_crypto_dump_info_json,
1743 	.write_config_json	= vbdev_crypto_write_config_json
1744 };
1745 
1746 static struct spdk_bdev_module crypto_if = {
1747 	.name = "crypto",
1748 	.module_init = vbdev_crypto_init,
1749 	.config_text = vbdev_crypto_get_spdk_running_config,
1750 	.get_ctx_size = vbdev_crypto_get_ctx_size,
1751 	.examine_config = vbdev_crypto_examine,
1752 	.module_fini = vbdev_crypto_finish,
1753 	.config_json = vbdev_crypto_config_json
1754 };
1755 
1756 SPDK_BDEV_MODULE_REGISTER(crypto, &crypto_if)
1757 
1758 static int
1759 vbdev_crypto_claim(struct spdk_bdev *bdev)
1760 {
1761 	struct bdev_names *name;
1762 	struct vbdev_crypto *vbdev;
1763 	struct vbdev_dev *device;
1764 	bool found = false;
1765 	int rc = 0;
1766 
1767 	if (g_number_of_claimed_volumes >= MAX_CRYPTO_VOLUMES) {
1768 		SPDK_DEBUGLOG(SPDK_LOG_CRYPTO, "Reached max number of claimed volumes\n");
1769 		rc = -EINVAL;
1770 		goto error_vbdev_alloc;
1771 	}
1772 	g_number_of_claimed_volumes++;
1773 
1774 	/* Check our list of names from config versus this bdev and if
1775 	 * there's a match, create the crypto_bdev & bdev accordingly.
1776 	 */
1777 	TAILQ_FOREACH(name, &g_bdev_names, link) {
1778 		if (strcmp(name->bdev_name, bdev->name) != 0) {
1779 			continue;
1780 		}
1781 		SPDK_DEBUGLOG(SPDK_LOG_CRYPTO, "Match on %s\n", bdev->name);
1782 
1783 		vbdev = calloc(1, sizeof(struct vbdev_crypto));
1784 		if (!vbdev) {
1785 			SPDK_ERRLOG("could not allocate crypto_bdev\n");
1786 			rc = -ENOMEM;
1787 			goto error_vbdev_alloc;
1788 		}
1789 
1790 		/* The base bdev that we're attaching to. */
1791 		vbdev->base_bdev = bdev;
1792 		vbdev->crypto_bdev.name = strdup(name->vbdev_name);
1793 		if (!vbdev->crypto_bdev.name) {
1794 			SPDK_ERRLOG("could not allocate crypto_bdev name\n");
1795 			rc = -ENOMEM;
1796 			goto error_bdev_name;
1797 		}
1798 
1799 		vbdev->key = strdup(name->key);
1800 		if (!vbdev->key) {
1801 			SPDK_ERRLOG("could not allocate crypto_bdev key\n");
1802 			rc = -ENOMEM;
1803 			goto error_alloc_key;
1804 		}
1805 
1806 		if (name->key2) {
1807 			vbdev->key2 = strdup(name->key2);
1808 			if (!vbdev->key2) {
1809 				SPDK_ERRLOG("could not allocate crypto_bdev key2\n");
1810 				rc = -ENOMEM;
1811 				goto error_alloc_key2;
1812 			}
1813 		}
1814 
1815 		vbdev->drv_name = strdup(name->drv_name);
1816 		if (!vbdev->drv_name) {
1817 			SPDK_ERRLOG("could not allocate crypto_bdev drv_name\n");
1818 			rc = -ENOMEM;
1819 			goto error_drv_name;
1820 		}
1821 
1822 		vbdev->crypto_bdev.product_name = "crypto";
1823 		vbdev->crypto_bdev.write_cache = bdev->write_cache;
1824 		vbdev->cipher = AES_CBC;
1825 		if (strcmp(vbdev->drv_name, QAT) == 0) {
1826 			vbdev->crypto_bdev.required_alignment =
1827 				spdk_max(spdk_u32log2(bdev->blocklen), bdev->required_alignment);
1828 			SPDK_NOTICELOG("QAT in use: Required alignment set to %u\n",
1829 				       vbdev->crypto_bdev.required_alignment);
1830 			if (strcmp(name->cipher, AES_CBC) == 0) {
1831 				SPDK_NOTICELOG("QAT using cipher: AES_CBC\n");
1832 			} else {
1833 				SPDK_NOTICELOG("QAT using cipher: AES_XTS\n");
1834 				vbdev->cipher = AES_XTS;
1835 				/* DPDK expects they keys to be concatenated together. */
1836 				vbdev->xts_key = calloc(1, (AES_XTS_KEY_LENGTH * 2) + 1);
1837 				if (vbdev->xts_key == NULL) {
1838 					SPDK_ERRLOG("could not allocate memory for XTS key\n");
1839 					rc = -ENOMEM;
1840 					goto error_xts_key;
1841 				}
1842 				memcpy(vbdev->xts_key, vbdev->key, AES_XTS_KEY_LENGTH);
1843 				assert(name->key2);
1844 				memcpy(vbdev->xts_key + AES_XTS_KEY_LENGTH, name->key2, AES_XTS_KEY_LENGTH + 1);
1845 			}
1846 		} else {
1847 			vbdev->crypto_bdev.required_alignment = bdev->required_alignment;
1848 		}
1849 		/* Note: CRYPTO_MAX_IO is in units of bytes, optimal_io_boundary is
1850 		 * in units of blocks.
1851 		 */
1852 		if (bdev->optimal_io_boundary > 0) {
1853 			vbdev->crypto_bdev.optimal_io_boundary =
1854 				spdk_min((CRYPTO_MAX_IO / bdev->blocklen), bdev->optimal_io_boundary);
1855 		} else {
1856 			vbdev->crypto_bdev.optimal_io_boundary = (CRYPTO_MAX_IO / bdev->blocklen);
1857 		}
1858 		vbdev->crypto_bdev.split_on_optimal_io_boundary = true;
1859 		vbdev->crypto_bdev.blocklen = bdev->blocklen;
1860 		vbdev->crypto_bdev.blockcnt = bdev->blockcnt;
1861 
1862 		/* This is the context that is passed to us when the bdev
1863 		 * layer calls in so we'll save our crypto_bdev node here.
1864 		 */
1865 		vbdev->crypto_bdev.ctxt = vbdev;
1866 		vbdev->crypto_bdev.fn_table = &vbdev_crypto_fn_table;
1867 		vbdev->crypto_bdev.module = &crypto_if;
1868 		TAILQ_INSERT_TAIL(&g_vbdev_crypto, vbdev, link);
1869 
1870 		spdk_io_device_register(vbdev, crypto_bdev_ch_create_cb, crypto_bdev_ch_destroy_cb,
1871 					sizeof(struct crypto_io_channel), vbdev->crypto_bdev.name);
1872 
1873 		rc = spdk_bdev_open(bdev, true, vbdev_crypto_examine_hotremove_cb,
1874 				    bdev, &vbdev->base_desc);
1875 		if (rc) {
1876 			SPDK_ERRLOG("could not open bdev %s\n", spdk_bdev_get_name(bdev));
1877 			goto error_open;
1878 		}
1879 
1880 		/* Save the thread where the base device is opened */
1881 		vbdev->thread = spdk_get_thread();
1882 
1883 		rc = spdk_bdev_module_claim_bdev(bdev, vbdev->base_desc, vbdev->crypto_bdev.module);
1884 		if (rc) {
1885 			SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(bdev));
1886 			goto error_claim;
1887 		}
1888 
1889 		/* To init the session we have to get the cryptoDev device ID for this vbdev */
1890 		TAILQ_FOREACH(device, &g_vbdev_devs, link) {
1891 			if (strcmp(device->cdev_info.driver_name, vbdev->drv_name) == 0) {
1892 				found = true;
1893 				break;
1894 			}
1895 		}
1896 		if (found == false) {
1897 			SPDK_ERRLOG("ERROR can't match crypto device driver to crypto vbdev!\n");
1898 			rc = -EINVAL;
1899 			goto error_cant_find_devid;
1900 		}
1901 
1902 		/* Get sessions. */
1903 		vbdev->session_encrypt = rte_cryptodev_sym_session_create(g_session_mp);
1904 		if (NULL == vbdev->session_encrypt) {
1905 			SPDK_ERRLOG("ERROR trying to create crypto session!\n");
1906 			rc = -EINVAL;
1907 			goto error_session_en_create;
1908 		}
1909 
1910 		vbdev->session_decrypt = rte_cryptodev_sym_session_create(g_session_mp);
1911 		if (NULL == vbdev->session_decrypt) {
1912 			SPDK_ERRLOG("ERROR trying to create crypto session!\n");
1913 			rc = -EINVAL;
1914 			goto error_session_de_create;
1915 		}
1916 
1917 		/* Init our per vbdev xform with the desired cipher options. */
1918 		vbdev->cipher_xform.type = RTE_CRYPTO_SYM_XFORM_CIPHER;
1919 		vbdev->cipher_xform.cipher.iv.offset = IV_OFFSET;
1920 		if (strcmp(name->cipher, AES_CBC) == 0) {
1921 			vbdev->cipher_xform.cipher.key.data = vbdev->key;
1922 			vbdev->cipher_xform.cipher.algo = RTE_CRYPTO_CIPHER_AES_CBC;
1923 			vbdev->cipher_xform.cipher.key.length = AES_CBC_KEY_LENGTH;
1924 		} else {
1925 			vbdev->cipher_xform.cipher.key.data = vbdev->xts_key;
1926 			vbdev->cipher_xform.cipher.algo = RTE_CRYPTO_CIPHER_AES_XTS;
1927 			vbdev->cipher_xform.cipher.key.length = AES_XTS_KEY_LENGTH * 2;
1928 		}
1929 		vbdev->cipher_xform.cipher.iv.length = AES_CBC_IV_LENGTH;
1930 
1931 		vbdev->cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT;
1932 		rc = rte_cryptodev_sym_session_init(device->cdev_id, vbdev->session_encrypt,
1933 						    &vbdev->cipher_xform,
1934 						    g_session_mp_priv ? g_session_mp_priv : g_session_mp);
1935 		if (rc < 0) {
1936 			SPDK_ERRLOG("ERROR trying to init encrypt session!\n");
1937 			rc = -EINVAL;
1938 			goto error_session_init;
1939 		}
1940 
1941 		vbdev->cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_DECRYPT;
1942 		rc = rte_cryptodev_sym_session_init(device->cdev_id, vbdev->session_decrypt,
1943 						    &vbdev->cipher_xform,
1944 						    g_session_mp_priv ? g_session_mp_priv : g_session_mp);
1945 		if (rc < 0) {
1946 			SPDK_ERRLOG("ERROR trying to init decrypt session!\n");
1947 			rc = -EINVAL;
1948 			goto error_session_init;
1949 		}
1950 
1951 		rc = spdk_bdev_register(&vbdev->crypto_bdev);
1952 		if (rc < 0) {
1953 			SPDK_ERRLOG("ERROR trying to register bdev\n");
1954 			rc = -EINVAL;
1955 			goto error_bdev_register;
1956 		}
1957 		SPDK_DEBUGLOG(SPDK_LOG_CRYPTO, "registered io_device and virtual bdev for: %s\n",
1958 			      name->vbdev_name);
1959 		break;
1960 	}
1961 
1962 	return rc;
1963 
1964 	/* Error cleanup paths. */
1965 error_bdev_register:
1966 error_session_init:
1967 	rte_cryptodev_sym_session_free(vbdev->session_decrypt);
1968 error_session_de_create:
1969 	rte_cryptodev_sym_session_free(vbdev->session_encrypt);
1970 error_session_en_create:
1971 error_cant_find_devid:
1972 error_claim:
1973 	spdk_bdev_close(vbdev->base_desc);
1974 error_open:
1975 	TAILQ_REMOVE(&g_vbdev_crypto, vbdev, link);
1976 	spdk_io_device_unregister(vbdev, NULL);
1977 	free(vbdev->xts_key);
1978 error_xts_key:
1979 	free(vbdev->drv_name);
1980 error_drv_name:
1981 	free(vbdev->key2);
1982 error_alloc_key2:
1983 	free(vbdev->key);
1984 error_alloc_key:
1985 	free(vbdev->crypto_bdev.name);
1986 error_bdev_name:
1987 	free(vbdev);
1988 error_vbdev_alloc:
1989 	g_number_of_claimed_volumes--;
1990 	return rc;
1991 }
1992 
1993 /* RPC entry for deleting a crypto vbdev. */
1994 void
1995 delete_crypto_disk(struct spdk_bdev *bdev, spdk_delete_crypto_complete cb_fn,
1996 		   void *cb_arg)
1997 {
1998 	struct bdev_names *name;
1999 
2000 	if (!bdev || bdev->module != &crypto_if) {
2001 		cb_fn(cb_arg, -ENODEV);
2002 		return;
2003 	}
2004 
2005 	/* Remove the association (vbdev, bdev) from g_bdev_names. This is required so that the
2006 	 * vbdev does not get re-created if the same bdev is constructed at some other time,
2007 	 * unless the underlying bdev was hot-removed.
2008 	 */
2009 	TAILQ_FOREACH(name, &g_bdev_names, link) {
2010 		if (strcmp(name->vbdev_name, bdev->name) == 0) {
2011 			TAILQ_REMOVE(&g_bdev_names, name, link);
2012 			free(name->bdev_name);
2013 			free(name->vbdev_name);
2014 			free(name->drv_name);
2015 			free(name->key);
2016 			free(name->key2);
2017 			free(name);
2018 			break;
2019 		}
2020 	}
2021 
2022 	/* Additional cleanup happens in the destruct callback. */
2023 	spdk_bdev_unregister(bdev, cb_fn, cb_arg);
2024 }
2025 
2026 /* Because we specified this function in our crypto bdev function table when we
2027  * registered our crypto bdev, we'll get this call anytime a new bdev shows up.
2028  * Here we need to decide if we care about it and if so what to do. We
2029  * parsed the config file at init so we check the new bdev against the list
2030  * we built up at that time and if the user configured us to attach to this
2031  * bdev, here's where we do it.
2032  */
2033 static void
2034 vbdev_crypto_examine(struct spdk_bdev *bdev)
2035 {
2036 	vbdev_crypto_claim(bdev);
2037 	spdk_bdev_module_examine_done(&crypto_if);
2038 }
2039 
2040 SPDK_LOG_REGISTER_COMPONENT("vbdev_crypto", SPDK_LOG_CRYPTO)
2041