xref: /spdk/module/bdev/crypto/vbdev_crypto.c (revision be4a5602ce7d3e2d9cc7ff6cde0b0dcb99d647c8)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUcryptoION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "vbdev_crypto.h"
35 
36 #include "spdk/env.h"
37 #include "spdk/conf.h"
38 #include "spdk/endian.h"
39 #include "spdk/io_channel.h"
40 #include "spdk/bdev_module.h"
41 #include "spdk_internal/log.h"
42 
43 #include <rte_config.h>
44 #include <rte_version.h>
45 #include <rte_bus_vdev.h>
46 #include <rte_crypto.h>
47 #include <rte_cryptodev.h>
48 #include <rte_cryptodev_pmd.h>
49 
50 /* To add support for new device types, follow the examples of the following...
51  * Note that the string names are defined by the DPDK PMD in question so be
52  * sure to use the exact names.
53  */
54 #define MAX_NUM_DRV_TYPES 2
55 #define AESNI_MB "crypto_aesni_mb"
56 #define QAT "crypto_qat"
57 const char *g_driver_names[MAX_NUM_DRV_TYPES] = { AESNI_MB, QAT };
58 
59 /* Global list of available crypto devices. */
60 struct vbdev_dev {
61 	struct rte_cryptodev_info	cdev_info;	/* includes device friendly name */
62 	uint8_t				cdev_id;	/* identifier for the device */
63 	TAILQ_ENTRY(vbdev_dev)		link;
64 };
65 static TAILQ_HEAD(, vbdev_dev) g_vbdev_devs = TAILQ_HEAD_INITIALIZER(g_vbdev_devs);
66 
67 /* Global list and lock for unique device/queue pair combos */
68 struct device_qp {
69 	struct vbdev_dev		*device;	/* ptr to crypto device */
70 	uint8_t				qp;		/* queue pair for this node */
71 	bool				in_use;		/* whether this node is in use or not */
72 	TAILQ_ENTRY(device_qp)		link;
73 };
74 static TAILQ_HEAD(, device_qp) g_device_qp = TAILQ_HEAD_INITIALIZER(g_device_qp);
75 static pthread_mutex_t g_device_qp_lock = PTHREAD_MUTEX_INITIALIZER;
76 
77 
78 /* In order to limit the number of resources we need to do one crypto
79  * operation per LBA (we use LBA as IV), we tell the bdev layer that
80  * our max IO size is something reasonable. Units here are in bytes.
81  */
82 #define CRYPTO_MAX_IO		(64 * 1024)
83 
84 /* This controls how many ops will be dequeued from the crypto driver in one run
85  * of the poller. It is mainly a performance knob as it effectively determines how
86  * much work the poller has to do.  However even that can vary between crypto drivers
87  * as the AESNI_MB driver for example does all the crypto work on dequeue whereas the
88  * QAT driver just dequeues what has been completed already.
89  */
90 #define MAX_DEQUEUE_BURST_SIZE	64
91 
92 /* When enqueueing, we need to supply the crypto driver with an array of pointers to
93  * operation structs. As each of these can be max 512B, we can adjust the CRYPTO_MAX_IO
94  * value in conjunction with the other defines to make sure we're not using crazy amounts
95  * of memory. All of these numbers can and probably should be adjusted based on the
96  * workload. By default we'll use the worst case (smallest) block size for the
97  * minimum number of array entries. As an example, a CRYPTO_MAX_IO size of 64K with 512B
98  * blocks would give us an enqueue array size of 128.
99  */
100 #define MAX_ENQUEUE_ARRAY_SIZE (CRYPTO_MAX_IO / 512)
101 
102 /* The number of MBUFS we need must be a power of two and to support other small IOs
103  * in addition to the limits mentioned above, we go to the next power of two. It is
104  * big number because it is one mempool for source and destination mbufs. It may
105  * need to be bigger to support multiple crypto drivers at once.
106  */
107 #define NUM_MBUFS		32768
108 #define POOL_CACHE_SIZE		256
109 #define MAX_CRYPTO_VOLUMES	128
110 #define NUM_SESSIONS		(2 * MAX_CRYPTO_VOLUMES)
111 #define SESS_MEMPOOL_CACHE_SIZE 0
112 uint8_t g_number_of_claimed_volumes = 0;
113 
114 /* This is the max number of IOs we can supply to any crypto device QP at one time.
115  * It can vary between drivers.
116  */
117 #define CRYPTO_QP_DESCRIPTORS	2048
118 
119 /* Specific to AES_CBC. */
120 #define AES_CBC_IV_LENGTH	16
121 #define AES_CBC_KEY_LENGTH	16
122 
123 /* Common for suported devices. */
124 #define IV_OFFSET            (sizeof(struct rte_crypto_op) + \
125 				sizeof(struct rte_crypto_sym_op))
126 #define QUEUED_OP_OFFSET (IV_OFFSET + AES_CBC_IV_LENGTH)
127 
128 static void _complete_internal_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
129 static void _complete_internal_read(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
130 static void _complete_internal_write(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
131 static void vbdev_crypto_examine(struct spdk_bdev *bdev);
132 static int vbdev_crypto_claim(struct spdk_bdev *bdev);
133 static void vbdev_crypto_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io);
134 
135 /* List of crypto_bdev names and their base bdevs via configuration file. */
136 struct bdev_names {
137 	char			*vbdev_name;	/* name of the vbdev to create */
138 	char			*bdev_name;	/* base bdev name */
139 
140 	/* Note, for dev/test we allow use of key in the config file, for production
141 	 * use, you must use an RPC to specify the key for security reasons.
142 	 */
143 	uint8_t			*key;		/* key per bdev */
144 	char			*drv_name;	/* name of the crypto device driver */
145 	TAILQ_ENTRY(bdev_names)	link;
146 };
147 static TAILQ_HEAD(, bdev_names) g_bdev_names = TAILQ_HEAD_INITIALIZER(g_bdev_names);
148 
149 /* List of virtual bdevs and associated info for each. We keep the device friendly name here even
150  * though its also in the device struct because we use it early on.
151  */
152 struct vbdev_crypto {
153 	struct spdk_bdev		*base_bdev;		/* the thing we're attaching to */
154 	struct spdk_bdev_desc		*base_desc;		/* its descriptor we get from open */
155 	struct spdk_bdev		crypto_bdev;		/* the crypto virtual bdev */
156 	uint8_t				*key;			/* key per bdev */
157 	char				*drv_name;		/* name of the crypto device driver */
158 	struct rte_cryptodev_sym_session *session_encrypt;	/* encryption session for this bdev */
159 	struct rte_cryptodev_sym_session *session_decrypt;	/* decryption session for this bdev */
160 	struct rte_crypto_sym_xform	cipher_xform;		/* crypto control struct for this bdev */
161 	TAILQ_ENTRY(vbdev_crypto)	link;
162 };
163 static TAILQ_HEAD(, vbdev_crypto) g_vbdev_crypto = TAILQ_HEAD_INITIALIZER(g_vbdev_crypto);
164 
165 /* Shared mempools between all devices on this system */
166 static struct rte_mempool *g_session_mp = NULL;
167 static struct rte_mempool *g_session_mp_priv = NULL;
168 static struct spdk_mempool *g_mbuf_mp = NULL;		/* mbuf mempool */
169 static struct rte_mempool *g_crypto_op_mp = NULL;	/* crypto operations, must be rte* mempool */
170 
171 /* For queueing up crypto operations that we can't submit for some reason */
172 struct vbdev_crypto_op {
173 	uint8_t					cdev_id;
174 	uint8_t					qp;
175 	struct rte_crypto_op			*crypto_op;
176 	struct spdk_bdev_io			*bdev_io;
177 	TAILQ_ENTRY(vbdev_crypto_op)		link;
178 };
179 #define QUEUED_OP_LENGTH (sizeof(struct vbdev_crypto_op))
180 
181 /* The crypto vbdev channel struct. It is allocated and freed on my behalf by the io channel code.
182  * We store things in here that are needed on per thread basis like the base_channel for this thread,
183  * and the poller for this thread.
184  */
185 struct crypto_io_channel {
186 	struct spdk_io_channel		*base_ch;		/* IO channel of base device */
187 	struct spdk_poller		*poller;		/* completion poller */
188 	struct device_qp		*device_qp;		/* unique device/qp combination for this channel */
189 	TAILQ_HEAD(, spdk_bdev_io)	pending_cry_ios;	/* outstanding operations to the crypto device */
190 	struct spdk_io_channel_iter	*iter;			/* used with for_each_channel in reset */
191 	TAILQ_HEAD(, vbdev_crypto_op)	queued_cry_ops;		/* queued for re-submission to CryptoDev */
192 };
193 
194 /* This is the crypto per IO context that the bdev layer allocates for us opaquely and attaches to
195  * each IO for us.
196  */
197 struct crypto_bdev_io {
198 	int cryop_cnt_remaining;			/* counter used when completing crypto ops */
199 	struct crypto_io_channel *crypto_ch;		/* need to store for crypto completion handling */
200 	struct vbdev_crypto *crypto_bdev;		/* the crypto node struct associated with this IO */
201 	struct spdk_bdev_io *orig_io;			/* the original IO */
202 	struct spdk_bdev_io *read_io;			/* the read IO we issued */
203 	int8_t bdev_io_status;				/* the status we'll report back on the bdev IO */
204 	bool on_pending_list;
205 	/* Used for the single contiguous buffer that serves as the crypto destination target for writes */
206 	uint64_t cry_num_blocks;			/* num of blocks for the contiguous buffer */
207 	uint64_t cry_offset_blocks;			/* block offset on media */
208 	struct iovec cry_iov;				/* iov representing contig write buffer */
209 
210 	/* for bdev_io_wait */
211 	struct spdk_bdev_io_wait_entry bdev_io_wait;
212 	struct spdk_io_channel *ch;
213 };
214 
215 /* Called by vbdev_crypto_init_crypto_drivers() to init each discovered crypto device */
216 static int
217 create_vbdev_dev(uint8_t index, uint16_t num_lcores)
218 {
219 	struct vbdev_dev *device;
220 	uint8_t j, cdev_id, cdrv_id;
221 	struct device_qp *dev_qp;
222 	struct device_qp *tmp_qp;
223 	int rc;
224 
225 	device = calloc(1, sizeof(struct vbdev_dev));
226 	if (!device) {
227 		return -ENOMEM;
228 	}
229 
230 	/* Get details about this device. */
231 	rte_cryptodev_info_get(index, &device->cdev_info);
232 	cdrv_id = device->cdev_info.driver_id;
233 	cdev_id = device->cdev_id = index;
234 
235 	/* Before going any further, make sure we have enough resources for this
236 	 * device type to function.  We need a unique queue pair per core accross each
237 	 * device type to remain lockless....
238 	 */
239 	if ((rte_cryptodev_device_count_by_driver(cdrv_id) *
240 	     device->cdev_info.max_nb_queue_pairs) < num_lcores) {
241 		SPDK_ERRLOG("Insufficient unique queue pairs available for %s\n",
242 			    device->cdev_info.driver_name);
243 		SPDK_ERRLOG("Either add more crypto devices or decrease core count\n");
244 		rc = -EINVAL;
245 		goto err;
246 	}
247 
248 	/* Setup queue pairs. */
249 	struct rte_cryptodev_config conf = {
250 		.nb_queue_pairs = device->cdev_info.max_nb_queue_pairs,
251 		.socket_id = SPDK_ENV_SOCKET_ID_ANY
252 	};
253 
254 	rc = rte_cryptodev_configure(cdev_id, &conf);
255 	if (rc < 0) {
256 		SPDK_ERRLOG("Failed to configure cryptodev %u\n", cdev_id);
257 		rc = -EINVAL;
258 		goto err;
259 	}
260 
261 	struct rte_cryptodev_qp_conf qp_conf = {
262 		.nb_descriptors = CRYPTO_QP_DESCRIPTORS,
263 #if RTE_VERSION >= RTE_VERSION_NUM(19, 02, 0, 0)
264 		.mp_session = g_session_mp,
265 		.mp_session_private = g_session_mp_priv,
266 #endif
267 	};
268 
269 	/* Pre-setup all potential qpairs now and assign them in the channel
270 	 * callback. If we were to create them there, we'd have to stop the
271 	 * entire device affecting all other threads that might be using it
272 	 * even on other queue pairs.
273 	 */
274 	for (j = 0; j < device->cdev_info.max_nb_queue_pairs; j++) {
275 #if RTE_VERSION >= RTE_VERSION_NUM(19, 02, 0, 0)
276 		rc = rte_cryptodev_queue_pair_setup(cdev_id, j, &qp_conf, SOCKET_ID_ANY);
277 #else
278 		rc = rte_cryptodev_queue_pair_setup(cdev_id, j, &qp_conf, SOCKET_ID_ANY,
279 						    g_session_mp);
280 #endif
281 
282 		if (rc < 0) {
283 			SPDK_ERRLOG("Failed to setup queue pair %u on "
284 				    "cryptodev %u\n", j, cdev_id);
285 			rc = -EINVAL;
286 			goto err;
287 		}
288 	}
289 
290 	rc = rte_cryptodev_start(cdev_id);
291 	if (rc < 0) {
292 		SPDK_ERRLOG("Failed to start device %u: error %d\n",
293 			    cdev_id, rc);
294 		rc = -EINVAL;
295 		goto err;
296 	}
297 
298 	/* Build up list of device/qp combinations */
299 	for (j = 0; j < device->cdev_info.max_nb_queue_pairs; j++) {
300 		dev_qp = calloc(1, sizeof(struct device_qp));
301 		if (!dev_qp) {
302 			rc = -ENOMEM;
303 			goto err;
304 		}
305 		dev_qp->device = device;
306 		dev_qp->qp = j;
307 		dev_qp->in_use = false;
308 		TAILQ_INSERT_TAIL(&g_device_qp, dev_qp, link);
309 	}
310 
311 	/* Add to our list of available crypto devices. */
312 	TAILQ_INSERT_TAIL(&g_vbdev_devs, device, link);
313 
314 	return 0;
315 err:
316 	TAILQ_FOREACH_SAFE(dev_qp, &g_device_qp, link, tmp_qp) {
317 		TAILQ_REMOVE(&g_device_qp, dev_qp, link);
318 		free(dev_qp);
319 	}
320 	free(device);
321 
322 	return rc;
323 
324 }
325 
326 /* This is called from the module's init function. We setup all crypto devices early on as we are unable
327  * to easily dynamically configure queue pairs after the drivers are up and running.  So, here, we
328  * configure the max capabilities of each device and assign threads to queue pairs as channels are
329  * requested.
330  */
331 static int
332 vbdev_crypto_init_crypto_drivers(void)
333 {
334 	uint8_t cdev_count;
335 	uint8_t cdev_id, i;
336 	int rc = 0;
337 	struct vbdev_dev *device;
338 	struct vbdev_dev *tmp_dev;
339 	unsigned int max_sess_size = 0, sess_size;
340 	uint16_t num_lcores = rte_lcore_count();
341 
342 	/* Only the first call, via RPC or module init should init the crypto drivers. */
343 	if (g_session_mp != NULL) {
344 		return 0;
345 	}
346 
347 	/* We always init AESNI_MB */
348 	rc = rte_vdev_init(AESNI_MB, NULL);
349 	if (rc) {
350 		SPDK_ERRLOG("error creating virtual PMD %s\n", AESNI_MB);
351 		return -EINVAL;
352 	}
353 
354 	/* If we have no crypto devices, there's no reason to continue. */
355 	cdev_count = rte_cryptodev_count();
356 	if (cdev_count == 0) {
357 		return 0;
358 	}
359 
360 	/*
361 	 * Create global mempools, shared by all devices regardless of type.
362 	 */
363 
364 	/* First determine max session size, most pools are shared by all the devices,
365 	 * so we need to find the global max sessions size.
366 	 */
367 	for (cdev_id = 0; cdev_id < cdev_count; cdev_id++) {
368 		sess_size = rte_cryptodev_sym_get_private_session_size(cdev_id);
369 		if (sess_size > max_sess_size) {
370 			max_sess_size = sess_size;
371 		}
372 	}
373 
374 #if RTE_VERSION >= RTE_VERSION_NUM(19, 02, 0, 0)
375 	g_session_mp_priv = rte_mempool_create("session_mp_priv", NUM_SESSIONS, max_sess_size,
376 					       SESS_MEMPOOL_CACHE_SIZE, 0, NULL, NULL, NULL,
377 					       NULL, SOCKET_ID_ANY, 0);
378 	if (g_session_mp_priv == NULL) {
379 		SPDK_ERRLOG("Cannot create private session pool max size 0x%x\n", max_sess_size);
380 		return -ENOMEM;
381 	}
382 
383 	g_session_mp = rte_cryptodev_sym_session_pool_create(
384 			       "session_mp",
385 			       NUM_SESSIONS, 0, SESS_MEMPOOL_CACHE_SIZE, 0,
386 			       SOCKET_ID_ANY);
387 #else
388 	g_session_mp = rte_mempool_create("session_mp", NUM_SESSIONS, max_sess_size,
389 					  SESS_MEMPOOL_CACHE_SIZE,
390 					  0, NULL, NULL, NULL, NULL, SOCKET_ID_ANY, 0);
391 #endif
392 	if (g_session_mp == NULL) {
393 		SPDK_ERRLOG("Cannot create session pool max size 0x%x\n", max_sess_size);
394 		goto error_create_session_mp;
395 		return -ENOMEM;
396 	}
397 
398 	g_mbuf_mp = spdk_mempool_create("mbuf_mp", NUM_MBUFS, sizeof(struct rte_mbuf),
399 					SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
400 					SPDK_ENV_SOCKET_ID_ANY);
401 	if (g_mbuf_mp == NULL) {
402 		SPDK_ERRLOG("Cannot create mbuf pool\n");
403 		rc = -ENOMEM;
404 		goto error_create_mbuf;
405 	}
406 
407 	/* We use per op private data to store the IV and our own struct
408 	 * for queueing ops.
409 	 */
410 	g_crypto_op_mp = rte_crypto_op_pool_create("op_mp",
411 			 RTE_CRYPTO_OP_TYPE_SYMMETRIC,
412 			 NUM_MBUFS,
413 			 POOL_CACHE_SIZE,
414 			 AES_CBC_IV_LENGTH + QUEUED_OP_LENGTH,
415 			 rte_socket_id());
416 
417 	if (g_crypto_op_mp == NULL) {
418 		SPDK_ERRLOG("Cannot create op pool\n");
419 		rc = -ENOMEM;
420 		goto error_create_op;
421 	}
422 
423 	/* Init all devices */
424 	for (i = 0; i < cdev_count; i++) {
425 		rc = create_vbdev_dev(i, num_lcores);
426 		if (rc) {
427 			goto err;
428 		}
429 	}
430 	return 0;
431 
432 	/* Error cleanup paths. */
433 err:
434 	TAILQ_FOREACH_SAFE(device, &g_vbdev_devs, link, tmp_dev) {
435 		TAILQ_REMOVE(&g_vbdev_devs, device, link);
436 		free(device);
437 	}
438 	rte_mempool_free(g_crypto_op_mp);
439 	g_crypto_op_mp = NULL;
440 error_create_op:
441 	spdk_mempool_free(g_mbuf_mp);
442 	g_mbuf_mp = NULL;
443 error_create_mbuf:
444 	rte_mempool_free(g_session_mp);
445 	g_session_mp = NULL;
446 error_create_session_mp:
447 	if (g_session_mp_priv != NULL) {
448 		rte_mempool_free(g_session_mp_priv);
449 		g_session_mp_priv = NULL;
450 	}
451 	return rc;
452 }
453 
454 /* Following an encrypt or decrypt we need to then either write the encrypted data or finish
455  * the read on decrypted data. Do that here.
456  */
457 static void
458 _crypto_operation_complete(struct spdk_bdev_io *bdev_io)
459 {
460 	struct vbdev_crypto *crypto_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_crypto,
461 					   crypto_bdev);
462 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
463 	struct crypto_io_channel *crypto_ch = io_ctx->crypto_ch;
464 	struct spdk_bdev_io *free_me = io_ctx->read_io;
465 	int rc = 0;
466 
467 	TAILQ_REMOVE(&crypto_ch->pending_cry_ios, bdev_io, module_link);
468 
469 	if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
470 
471 		/* Complete the original IO and then free the one that we created
472 		 * as a result of issuing an IO via submit_request.
473 		 */
474 		if (io_ctx->bdev_io_status != SPDK_BDEV_IO_STATUS_FAILED) {
475 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
476 		} else {
477 			SPDK_ERRLOG("Issue with decryption on bdev_io %p\n", bdev_io);
478 			rc = -EINVAL;
479 		}
480 		spdk_bdev_free_io(free_me);
481 
482 	} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
483 
484 		if (io_ctx->bdev_io_status != SPDK_BDEV_IO_STATUS_FAILED) {
485 			/* Write the encrypted data. */
486 			rc = spdk_bdev_writev_blocks(crypto_bdev->base_desc, crypto_ch->base_ch,
487 						     &io_ctx->cry_iov, 1, io_ctx->cry_offset_blocks,
488 						     io_ctx->cry_num_blocks, _complete_internal_write,
489 						     bdev_io);
490 		} else {
491 			SPDK_ERRLOG("Issue with encryption on bdev_io %p\n", bdev_io);
492 			rc = -EINVAL;
493 		}
494 
495 	} else {
496 		SPDK_ERRLOG("Unknown bdev type %u on crypto operation completion\n",
497 			    bdev_io->type);
498 		rc = -EINVAL;
499 	}
500 
501 	if (rc) {
502 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
503 	}
504 }
505 
506 static int _crypto_operation(struct spdk_bdev_io *bdev_io,
507 			     enum rte_crypto_cipher_operation crypto_op);
508 
509 /* This is the poller for the crypto device. It uses a single API to dequeue whatever is ready at
510  * the device. Then we need to decide if what we've got so far (including previous poller
511  * runs) totals up to one or more complete bdev_ios and if so continue with the bdev_io
512  * accordingly. This means either completing a read or issuing a new write.
513  */
514 static int
515 crypto_dev_poller(void *args)
516 {
517 	struct crypto_io_channel *crypto_ch = args;
518 	uint8_t cdev_id = crypto_ch->device_qp->device->cdev_id;
519 	int i, num_dequeued_ops, num_enqueued_ops;
520 	struct spdk_bdev_io *bdev_io = NULL;
521 	struct crypto_bdev_io *io_ctx = NULL;
522 	struct rte_crypto_op *dequeued_ops[MAX_DEQUEUE_BURST_SIZE];
523 	struct rte_crypto_op *mbufs_to_free[2 * MAX_DEQUEUE_BURST_SIZE];
524 	int num_mbufs = 0;
525 	struct vbdev_crypto_op *op_to_resubmit;
526 
527 	/* Each run of the poller will get just what the device has available
528 	 * at the moment we call it, we don't check again after draining the
529 	 * first batch.
530 	 */
531 	num_dequeued_ops = rte_cryptodev_dequeue_burst(cdev_id, crypto_ch->device_qp->qp,
532 			   dequeued_ops, MAX_DEQUEUE_BURST_SIZE);
533 
534 	/* Check if operation was processed successfully */
535 	for (i = 0; i < num_dequeued_ops; i++) {
536 
537 		/* We don't know the order or association of the crypto ops wrt any
538 		 * partiular bdev_io so need to look at each and determine if it's
539 		 * the last one for it's bdev_io or not.
540 		 */
541 		bdev_io = (struct spdk_bdev_io *)dequeued_ops[i]->sym->m_src->userdata;
542 		assert(bdev_io != NULL);
543 		io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
544 
545 		if (dequeued_ops[i]->status != RTE_CRYPTO_OP_STATUS_SUCCESS) {
546 			SPDK_ERRLOG("error with op %d status %u\n", i,
547 				    dequeued_ops[i]->status);
548 			/* Update the bdev status to error, we'll still process the
549 			 * rest of the crypto ops for this bdev_io though so they
550 			 * aren't left hanging.
551 			 */
552 			io_ctx->bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED;
553 		}
554 
555 		assert(io_ctx->cryop_cnt_remaining > 0);
556 
557 		/* Return the associated src and dst mbufs by collecting them into
558 		 * an array that we can use the bulk API to free after the loop.
559 		 */
560 		dequeued_ops[i]->sym->m_src->userdata = NULL;
561 		mbufs_to_free[num_mbufs++] = (void *)dequeued_ops[i]->sym->m_src;
562 		if (dequeued_ops[i]->sym->m_dst) {
563 			mbufs_to_free[num_mbufs++] = (void *)dequeued_ops[i]->sym->m_dst;
564 		}
565 
566 		/* done encrypting, complete the bdev_io */
567 		if (--io_ctx->cryop_cnt_remaining == 0) {
568 
569 			/* If we're completing this with an outstanding reset we need
570 			 * to fail it.
571 			 */
572 			if (crypto_ch->iter) {
573 				bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
574 			}
575 
576 			/* Complete the IO */
577 			_crypto_operation_complete(bdev_io);
578 		}
579 	}
580 
581 	/* Now bulk free both mbufs and crypto operations. */
582 	if (num_dequeued_ops > 0) {
583 		rte_mempool_put_bulk(g_crypto_op_mp,
584 				     (void **)dequeued_ops,
585 				     num_dequeued_ops);
586 		assert(num_mbufs > 0);
587 		spdk_mempool_put_bulk(g_mbuf_mp,
588 				      (void **)mbufs_to_free,
589 				      num_mbufs);
590 	}
591 
592 	/* Check if there are any pending crypto ops to process */
593 	while (!TAILQ_EMPTY(&crypto_ch->queued_cry_ops)) {
594 		op_to_resubmit = TAILQ_FIRST(&crypto_ch->queued_cry_ops);
595 		io_ctx = (struct crypto_bdev_io *)op_to_resubmit->bdev_io->driver_ctx;
596 		num_enqueued_ops = rte_cryptodev_enqueue_burst(op_to_resubmit->cdev_id,
597 				   op_to_resubmit->qp,
598 				   &op_to_resubmit->crypto_op,
599 				   1);
600 		if (num_enqueued_ops == 1) {
601 			/* Make sure we don't put this on twice as one bdev_io is made up
602 			 * of many crypto ops.
603 			 */
604 			if (io_ctx->on_pending_list == false) {
605 				TAILQ_INSERT_TAIL(&crypto_ch->pending_cry_ios, op_to_resubmit->bdev_io, module_link);
606 				io_ctx->on_pending_list = true;
607 			}
608 			TAILQ_REMOVE(&crypto_ch->queued_cry_ops, op_to_resubmit, link);
609 		} else {
610 			/* if we couldn't get one, just break and try again later. */
611 			break;
612 		}
613 	}
614 
615 	/* If the channel iter is not NULL, we need to continue to poll
616 	 * until the pending list is empty, then we can move on to the
617 	 * next channel.
618 	 */
619 	if (crypto_ch->iter && TAILQ_EMPTY(&crypto_ch->pending_cry_ios)) {
620 		SPDK_NOTICELOG("Channel %p has been quiesced.\n", crypto_ch);
621 		spdk_for_each_channel_continue(crypto_ch->iter, 0);
622 		crypto_ch->iter = NULL;
623 	}
624 
625 	return num_dequeued_ops;
626 }
627 
628 /* We're either encrypting on the way down or decrypting on the way back. */
629 static int
630 _crypto_operation(struct spdk_bdev_io *bdev_io, enum rte_crypto_cipher_operation crypto_op)
631 {
632 	uint16_t num_enqueued_ops = 0;
633 	uint32_t cryop_cnt = bdev_io->u.bdev.num_blocks;
634 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
635 	struct crypto_io_channel *crypto_ch = io_ctx->crypto_ch;
636 	uint8_t cdev_id = crypto_ch->device_qp->device->cdev_id;
637 	uint32_t crypto_len = io_ctx->crypto_bdev->crypto_bdev.blocklen;
638 	uint64_t total_length = bdev_io->u.bdev.num_blocks * crypto_len;
639 	int rc;
640 	uint32_t iov_index = 0;
641 	uint32_t allocated = 0;
642 	uint8_t *current_iov = NULL;
643 	uint64_t total_remaining = 0;
644 	uint64_t updated_length, current_iov_remaining = 0;
645 	uint32_t crypto_index = 0;
646 	uint32_t en_offset = 0;
647 	struct rte_crypto_op *crypto_ops[MAX_ENQUEUE_ARRAY_SIZE];
648 	struct rte_mbuf *src_mbufs[MAX_ENQUEUE_ARRAY_SIZE];
649 	struct rte_mbuf *dst_mbufs[MAX_ENQUEUE_ARRAY_SIZE];
650 	int burst;
651 	struct vbdev_crypto_op *op_to_queue;
652 
653 	assert((bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen) <= CRYPTO_MAX_IO);
654 
655 	/* Get the number of source mbufs that we need. These will always be 1:1 because we
656 	 * don't support chaining. The reason we don't is because of our decision to use
657 	 * LBA as IV, there can be no case where we'd need >1 mbuf per crypto op or the
658 	 * op would be > 1 LBA.
659 	 */
660 	rc = spdk_mempool_get_bulk(g_mbuf_mp, (void **)&src_mbufs[0], cryop_cnt);
661 	if (rc) {
662 		SPDK_ERRLOG("ERROR trying to get src_mbufs!\n");
663 		return -ENOMEM;
664 	}
665 
666 	/* Get the same amount but these buffers to describe the encrypted data location (dst). */
667 	if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
668 		rc = spdk_mempool_get_bulk(g_mbuf_mp, (void **)&dst_mbufs[0], cryop_cnt);
669 		if (rc) {
670 			SPDK_ERRLOG("ERROR trying to get dst_mbufs!\n");
671 			rc = -ENOMEM;
672 			goto error_get_dst;
673 		}
674 	}
675 
676 #ifdef __clang_analyzer__
677 	/* silence scan-build false positive */
678 	SPDK_CLANG_ANALYZER_PREINIT_PTR_ARRAY(crypto_ops, MAX_ENQUEUE_ARRAY_SIZE, 0x1000);
679 #endif
680 	/* Allocate crypto operations. */
681 	allocated = rte_crypto_op_bulk_alloc(g_crypto_op_mp,
682 					     RTE_CRYPTO_OP_TYPE_SYMMETRIC,
683 					     crypto_ops, cryop_cnt);
684 	if (allocated < cryop_cnt) {
685 		SPDK_ERRLOG("ERROR trying to get crypto ops!\n");
686 		rc = -ENOMEM;
687 		goto error_get_ops;
688 	}
689 
690 	/* For encryption, we need to prepare a single contiguous buffer as the encryption
691 	 * destination, we'll then pass that along for the write after encryption is done.
692 	 * This is done to avoiding encrypting the provided write buffer which may be
693 	 * undesirable in some use cases.
694 	 */
695 	if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
696 		io_ctx->cry_iov.iov_len = total_length;
697 		/* For now just allocate in the I/O path, not optimal but the current bdev API
698 		 * for getting a buffer from the pool won't work if the bdev_io passed in
699 		 * has a buffer, which ours always will.  So, until we modify that API
700 		 * or better yet the current ZCOPY work lands, this is the best we can do.
701 		 */
702 		io_ctx->cry_iov.iov_base = spdk_malloc(total_length,
703 						       spdk_bdev_get_buf_align(bdev_io->bdev), NULL,
704 						       SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
705 		if (!io_ctx->cry_iov.iov_base) {
706 			SPDK_ERRLOG("ERROR trying to allocate write buffer for encryption!\n");
707 			rc = -ENOMEM;
708 			goto error_get_write_buffer;
709 		}
710 		io_ctx->cry_offset_blocks = bdev_io->u.bdev.offset_blocks;
711 		io_ctx->cry_num_blocks = bdev_io->u.bdev.num_blocks;
712 	}
713 
714 	/* This value is used in the completion callback to determine when the bdev_io is
715 	 * complete.
716 	 */
717 	io_ctx->cryop_cnt_remaining = cryop_cnt;
718 
719 	/* As we don't support chaining because of a decision to use LBA as IV, construction
720 	 * of crypto operations is straightforward. We build both the op, the mbuf and the
721 	 * dst_mbuf in our local arrays by looping through the length of the bdev IO and
722 	 * picking off LBA sized blocks of memory from the IOVs as we walk through them. Each
723 	 * LBA sized chunk of memory will correspond 1:1 to a crypto operation and a single
724 	 * mbuf per crypto operation.
725 	 */
726 	total_remaining = total_length;
727 	current_iov = bdev_io->u.bdev.iovs[iov_index].iov_base;
728 	current_iov_remaining = bdev_io->u.bdev.iovs[iov_index].iov_len;
729 	do {
730 		uint8_t *iv_ptr;
731 		uint64_t op_block_offset;
732 
733 		/* Set the mbuf elements address and length. Null out the next pointer. */
734 		src_mbufs[crypto_index]->buf_addr = current_iov;
735 		src_mbufs[crypto_index]->data_len = updated_length = crypto_len;
736 		/* TODO: Make this assignment conditional on QAT usage and add an assert. */
737 		src_mbufs[crypto_index]->buf_iova = spdk_vtophys((void *)current_iov, &updated_length);
738 		src_mbufs[crypto_index]->next = NULL;
739 		/* Store context in every mbuf as we don't know anything about completion order */
740 		src_mbufs[crypto_index]->userdata = bdev_io;
741 
742 		/* Set the IV - we use the LBA of the crypto_op */
743 		iv_ptr = rte_crypto_op_ctod_offset(crypto_ops[crypto_index], uint8_t *,
744 						   IV_OFFSET);
745 		memset(iv_ptr, 0, AES_CBC_IV_LENGTH);
746 		op_block_offset = bdev_io->u.bdev.offset_blocks + crypto_index;
747 		rte_memcpy(iv_ptr, &op_block_offset, sizeof(uint64_t));
748 
749 		/* Set the data to encrypt/decrypt length */
750 		crypto_ops[crypto_index]->sym->cipher.data.length = crypto_len;
751 		crypto_ops[crypto_index]->sym->cipher.data.offset = 0;
752 
753 		/* link the mbuf to the crypto op. */
754 		crypto_ops[crypto_index]->sym->m_src = src_mbufs[crypto_index];
755 		if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
756 			crypto_ops[crypto_index]->sym->m_dst = src_mbufs[crypto_index];
757 		} else {
758 			crypto_ops[crypto_index]->sym->m_dst = NULL;
759 		}
760 
761 		/* For encrypt, point the destination to a buffer we allocate and redirect the bdev_io
762 		 * that will be used to process the write on completion to the same buffer. Setting
763 		 * up the en_buffer is a little simpler as we know the destination buffer is single IOV.
764 		 */
765 		if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
766 
767 			/* Set the relevant destination en_mbuf elements. */
768 			dst_mbufs[crypto_index]->buf_addr = io_ctx->cry_iov.iov_base + en_offset;
769 			dst_mbufs[crypto_index]->data_len = updated_length = crypto_len;
770 			/* TODO: Make this assignment conditional on QAT usage and add an assert. */
771 			dst_mbufs[crypto_index]->buf_iova = spdk_vtophys(dst_mbufs[crypto_index]->buf_addr,
772 							    &updated_length);
773 			crypto_ops[crypto_index]->sym->m_dst = dst_mbufs[crypto_index];
774 			en_offset += crypto_len;
775 			dst_mbufs[crypto_index]->next = NULL;
776 
777 			/* Attach the crypto session to the operation */
778 			rc = rte_crypto_op_attach_sym_session(crypto_ops[crypto_index],
779 							      io_ctx->crypto_bdev->session_encrypt);
780 			if (rc) {
781 				rc = -EINVAL;
782 				goto error_attach_session;
783 			}
784 
785 		} else {
786 			/* Attach the crypto session to the operation */
787 			rc = rte_crypto_op_attach_sym_session(crypto_ops[crypto_index],
788 							      io_ctx->crypto_bdev->session_decrypt);
789 			if (rc) {
790 				rc = -EINVAL;
791 				goto error_attach_session;
792 			}
793 
794 
795 		}
796 
797 		/* Subtract our running totals for the op in progress and the overall bdev io */
798 		total_remaining -= crypto_len;
799 		current_iov_remaining -= crypto_len;
800 
801 		/* move our current IOV pointer accordingly. */
802 		current_iov += crypto_len;
803 
804 		/* move on to the next crypto operation */
805 		crypto_index++;
806 
807 		/* If we're done with this IOV, move to the next one. */
808 		if (current_iov_remaining == 0 && total_remaining > 0) {
809 			iov_index++;
810 			current_iov = bdev_io->u.bdev.iovs[iov_index].iov_base;
811 			current_iov_remaining = bdev_io->u.bdev.iovs[iov_index].iov_len;
812 		}
813 	} while (total_remaining > 0);
814 
815 	/* Enqueue everything we've got but limit by the max number of descriptors we
816 	 * configured the crypto device for.
817 	 */
818 	burst = spdk_min(cryop_cnt, CRYPTO_QP_DESCRIPTORS);
819 	num_enqueued_ops = rte_cryptodev_enqueue_burst(cdev_id, crypto_ch->device_qp->qp,
820 			   &crypto_ops[0],
821 			   burst);
822 
823 	/* Add this bdev_io to our outstanding list if any of its crypto ops made it. */
824 	if (num_enqueued_ops > 0) {
825 		TAILQ_INSERT_TAIL(&crypto_ch->pending_cry_ios, bdev_io, module_link);
826 		io_ctx->on_pending_list = true;
827 	}
828 	/* We were unable to enqueue everything but did get some, so need to decide what
829 	 * to do based on the status of the last op.
830 	 */
831 	if (num_enqueued_ops < cryop_cnt) {
832 		switch (crypto_ops[num_enqueued_ops]->status) {
833 		case RTE_CRYPTO_OP_STATUS_NOT_PROCESSED:
834 			/* Queue them up on a linked list to be resubmitted via the poller. */
835 			for (crypto_index = num_enqueued_ops; crypto_index < cryop_cnt; crypto_index++) {
836 				op_to_queue = (struct vbdev_crypto_op *)rte_crypto_op_ctod_offset(crypto_ops[crypto_index],
837 						uint8_t *, QUEUED_OP_OFFSET);
838 				op_to_queue->cdev_id = cdev_id;
839 				op_to_queue->qp = crypto_ch->device_qp->qp;
840 				op_to_queue->crypto_op = crypto_ops[crypto_index];
841 				op_to_queue->bdev_io = bdev_io;
842 				TAILQ_INSERT_TAIL(&crypto_ch->queued_cry_ops,
843 						  op_to_queue,
844 						  link);
845 			}
846 			break;
847 		default:
848 			/* For all other statuses, set the io_ctx bdev_io status so that
849 			 * the poller will pick the failure up for the overall bdev status.
850 			 */
851 			io_ctx->bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED;
852 			if (num_enqueued_ops == 0) {
853 				/* If nothing was enqueued, but the last one wasn't because of
854 				 * busy, fail it now as the poller won't know anything about it.
855 				 */
856 				_crypto_operation_complete(bdev_io);
857 				rc = -EINVAL;
858 				goto error_attach_session;
859 			}
860 			break;
861 		}
862 	}
863 
864 	return rc;
865 
866 	/* Error cleanup paths. */
867 error_attach_session:
868 error_get_write_buffer:
869 	rte_mempool_put_bulk(g_crypto_op_mp, (void **)crypto_ops, cryop_cnt);
870 	allocated = 0;
871 error_get_ops:
872 	if (crypto_op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
873 		spdk_mempool_put_bulk(g_mbuf_mp, (void **)&dst_mbufs[0],
874 				      cryop_cnt);
875 	}
876 	if (allocated > 0) {
877 		rte_mempool_put_bulk(g_crypto_op_mp, (void **)crypto_ops,
878 				     allocated);
879 	}
880 error_get_dst:
881 	spdk_mempool_put_bulk(g_mbuf_mp, (void **)&src_mbufs[0],
882 			      cryop_cnt);
883 	return rc;
884 }
885 
886 /* This function is called after all channels have been quiesced following
887  * a bdev reset.
888  */
889 static void
890 _ch_quiesce_done(struct spdk_io_channel_iter *i, int status)
891 {
892 	struct crypto_bdev_io *io_ctx = spdk_io_channel_iter_get_ctx(i);
893 
894 	assert(TAILQ_EMPTY(&io_ctx->crypto_ch->pending_cry_ios));
895 	assert(io_ctx->orig_io != NULL);
896 
897 	spdk_bdev_io_complete(io_ctx->orig_io, SPDK_BDEV_IO_STATUS_SUCCESS);
898 }
899 
900 /* This function is called per channel to quiesce IOs before completing a
901  * bdev reset that we received.
902  */
903 static void
904 _ch_quiesce(struct spdk_io_channel_iter *i)
905 {
906 	struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
907 	struct crypto_io_channel *crypto_ch = spdk_io_channel_get_ctx(ch);
908 
909 	crypto_ch->iter = i;
910 	/* When the poller runs, it will see the non-NULL iter and handle
911 	 * the quiesce.
912 	 */
913 }
914 
915 /* Completion callback for IO that were issued from this bdev other than read/write.
916  * They have their own for readability.
917  */
918 static void
919 _complete_internal_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
920 {
921 	struct spdk_bdev_io *orig_io = cb_arg;
922 	int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
923 
924 	if (bdev_io->type == SPDK_BDEV_IO_TYPE_RESET) {
925 		struct crypto_bdev_io *orig_ctx = (struct crypto_bdev_io *)orig_io->driver_ctx;
926 
927 		assert(orig_io == orig_ctx->orig_io);
928 
929 		spdk_bdev_free_io(bdev_io);
930 
931 		spdk_for_each_channel(orig_ctx->crypto_bdev,
932 				      _ch_quiesce,
933 				      orig_ctx,
934 				      _ch_quiesce_done);
935 		return;
936 	}
937 
938 	spdk_bdev_io_complete(orig_io, status);
939 	spdk_bdev_free_io(bdev_io);
940 }
941 
942 /* Completion callback for writes that were issued from this bdev. */
943 static void
944 _complete_internal_write(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
945 {
946 	struct spdk_bdev_io *orig_io = cb_arg;
947 	int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
948 	struct crypto_bdev_io *orig_ctx = (struct crypto_bdev_io *)orig_io->driver_ctx;
949 
950 	spdk_free(orig_ctx->cry_iov.iov_base);
951 	spdk_bdev_io_complete(orig_io, status);
952 	spdk_bdev_free_io(bdev_io);
953 }
954 
955 /* Completion callback for reads that were issued from this bdev. */
956 static void
957 _complete_internal_read(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
958 {
959 	struct spdk_bdev_io *orig_io = cb_arg;
960 	struct crypto_bdev_io *orig_ctx = (struct crypto_bdev_io *)orig_io->driver_ctx;
961 
962 	if (success) {
963 
964 		/* Save off this bdev_io so it can be freed after decryption. */
965 		orig_ctx->read_io = bdev_io;
966 
967 		if (!_crypto_operation(orig_io, RTE_CRYPTO_CIPHER_OP_DECRYPT)) {
968 			return;
969 		} else {
970 			SPDK_ERRLOG("ERROR decrypting\n");
971 		}
972 	} else {
973 		SPDK_ERRLOG("ERROR on read prior to decrypting\n");
974 	}
975 
976 	spdk_bdev_io_complete(orig_io, SPDK_BDEV_IO_STATUS_FAILED);
977 	spdk_bdev_free_io(bdev_io);
978 }
979 
980 static void
981 vbdev_crypto_resubmit_io(void *arg)
982 {
983 	struct spdk_bdev_io *bdev_io = (struct spdk_bdev_io *)arg;
984 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
985 
986 	vbdev_crypto_submit_request(io_ctx->ch, bdev_io);
987 }
988 
989 static void
990 vbdev_crypto_queue_io(struct spdk_bdev_io *bdev_io)
991 {
992 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
993 	int rc;
994 
995 	io_ctx->bdev_io_wait.bdev = bdev_io->bdev;
996 	io_ctx->bdev_io_wait.cb_fn = vbdev_crypto_resubmit_io;
997 	io_ctx->bdev_io_wait.cb_arg = bdev_io;
998 
999 	rc = spdk_bdev_queue_io_wait(bdev_io->bdev, io_ctx->ch, &io_ctx->bdev_io_wait);
1000 	if (rc != 0) {
1001 		SPDK_ERRLOG("Queue io failed in vbdev_crypto_queue_io, rc=%d.\n", rc);
1002 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1003 	}
1004 }
1005 
1006 /* Callback for getting a buf from the bdev pool in the event that the caller passed
1007  * in NULL, we need to own the buffer so it doesn't get freed by another vbdev module
1008  * beneath us before we're done with it.
1009  */
1010 static void
1011 crypto_read_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
1012 		       bool success)
1013 {
1014 	struct vbdev_crypto *crypto_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_crypto,
1015 					   crypto_bdev);
1016 	struct crypto_io_channel *crypto_ch = spdk_io_channel_get_ctx(ch);
1017 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
1018 	int rc;
1019 
1020 	if (!success) {
1021 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1022 		return;
1023 	}
1024 
1025 	rc = spdk_bdev_readv_blocks(crypto_bdev->base_desc, crypto_ch->base_ch, bdev_io->u.bdev.iovs,
1026 				    bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.offset_blocks,
1027 				    bdev_io->u.bdev.num_blocks, _complete_internal_read,
1028 				    bdev_io);
1029 	if (rc != 0) {
1030 		if (rc == -ENOMEM) {
1031 			SPDK_DEBUGLOG(SPDK_LOG_CRYPTO, "No memory, queue the IO.\n");
1032 			io_ctx->ch = ch;
1033 			vbdev_crypto_queue_io(bdev_io);
1034 		} else {
1035 			SPDK_ERRLOG("ERROR on bdev_io submission!\n");
1036 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1037 		}
1038 	}
1039 }
1040 
1041 /* Called when someone submits IO to this crypto vbdev. For IO's not relevant to crypto,
1042  * we're simply passing it on here via SPDK IO calls which in turn allocate another bdev IO
1043  * and call our cpl callback provided below along with the original bdev_io so that we can
1044  * complete it once this IO completes. For crypto operations, we'll either encrypt it first
1045  * (writes) then call back into bdev to submit it or we'll submit a read and then catch it
1046  * on the way back for decryption.
1047  */
1048 static void
1049 vbdev_crypto_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
1050 {
1051 	struct vbdev_crypto *crypto_bdev = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_crypto,
1052 					   crypto_bdev);
1053 	struct crypto_io_channel *crypto_ch = spdk_io_channel_get_ctx(ch);
1054 	struct crypto_bdev_io *io_ctx = (struct crypto_bdev_io *)bdev_io->driver_ctx;
1055 	int rc = 0;
1056 
1057 	memset(io_ctx, 0, sizeof(struct crypto_bdev_io));
1058 	io_ctx->crypto_bdev = crypto_bdev;
1059 	io_ctx->crypto_ch = crypto_ch;
1060 	io_ctx->orig_io = bdev_io;
1061 	io_ctx->bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
1062 
1063 	switch (bdev_io->type) {
1064 	case SPDK_BDEV_IO_TYPE_READ:
1065 		spdk_bdev_io_get_buf(bdev_io, crypto_read_get_buf_cb,
1066 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
1067 		break;
1068 	case SPDK_BDEV_IO_TYPE_WRITE:
1069 		rc = _crypto_operation(bdev_io, RTE_CRYPTO_CIPHER_OP_ENCRYPT);
1070 		break;
1071 	case SPDK_BDEV_IO_TYPE_UNMAP:
1072 		rc = spdk_bdev_unmap_blocks(crypto_bdev->base_desc, crypto_ch->base_ch,
1073 					    bdev_io->u.bdev.offset_blocks,
1074 					    bdev_io->u.bdev.num_blocks,
1075 					    _complete_internal_io, bdev_io);
1076 		break;
1077 	case SPDK_BDEV_IO_TYPE_FLUSH:
1078 		rc = spdk_bdev_flush_blocks(crypto_bdev->base_desc, crypto_ch->base_ch,
1079 					    bdev_io->u.bdev.offset_blocks,
1080 					    bdev_io->u.bdev.num_blocks,
1081 					    _complete_internal_io, bdev_io);
1082 		break;
1083 	case SPDK_BDEV_IO_TYPE_RESET:
1084 		rc = spdk_bdev_reset(crypto_bdev->base_desc, crypto_ch->base_ch,
1085 				     _complete_internal_io, bdev_io);
1086 		break;
1087 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
1088 	default:
1089 		SPDK_ERRLOG("crypto: unknown I/O type %d\n", bdev_io->type);
1090 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1091 		return;
1092 	}
1093 
1094 	if (rc != 0) {
1095 		if (rc == -ENOMEM) {
1096 			SPDK_DEBUGLOG(SPDK_LOG_CRYPTO, "No memory, queue the IO.\n");
1097 			io_ctx->ch = ch;
1098 			vbdev_crypto_queue_io(bdev_io);
1099 		} else {
1100 			SPDK_ERRLOG("ERROR on bdev_io submission!\n");
1101 			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
1102 		}
1103 	}
1104 }
1105 
1106 /* We'll just call the base bdev and let it answer except for WZ command which
1107  * we always say we don't support so that the bdev layer will actually send us
1108  * real writes that we can encrypt.
1109  */
1110 static bool
1111 vbdev_crypto_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
1112 {
1113 	struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx;
1114 
1115 	switch (io_type) {
1116 	case SPDK_BDEV_IO_TYPE_WRITE:
1117 	case SPDK_BDEV_IO_TYPE_UNMAP:
1118 	case SPDK_BDEV_IO_TYPE_RESET:
1119 	case SPDK_BDEV_IO_TYPE_READ:
1120 	case SPDK_BDEV_IO_TYPE_FLUSH:
1121 		return spdk_bdev_io_type_supported(crypto_bdev->base_bdev, io_type);
1122 	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
1123 	/* Force the bdev layer to issue actual writes of zeroes so we can
1124 	 * encrypt them as regular writes.
1125 	 */
1126 	default:
1127 		return false;
1128 	}
1129 }
1130 
1131 /* Callback for unregistering the IO device. */
1132 static void
1133 _device_unregister_cb(void *io_device)
1134 {
1135 	struct vbdev_crypto *crypto_bdev = io_device;
1136 
1137 	/* Done with this crypto_bdev. */
1138 	rte_cryptodev_sym_session_free(crypto_bdev->session_decrypt);
1139 	rte_cryptodev_sym_session_free(crypto_bdev->session_encrypt);
1140 	free(crypto_bdev->drv_name);
1141 	free(crypto_bdev->key);
1142 	free(crypto_bdev->crypto_bdev.name);
1143 	free(crypto_bdev);
1144 }
1145 
1146 /* Called after we've unregistered following a hot remove callback.
1147  * Our finish entry point will be called next.
1148  */
1149 static int
1150 vbdev_crypto_destruct(void *ctx)
1151 {
1152 	struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx;
1153 
1154 	/* Remove this device from the internal list */
1155 	TAILQ_REMOVE(&g_vbdev_crypto, crypto_bdev, link);
1156 
1157 	/* Unclaim the underlying bdev. */
1158 	spdk_bdev_module_release_bdev(crypto_bdev->base_bdev);
1159 
1160 	/* Close the underlying bdev. */
1161 	spdk_bdev_close(crypto_bdev->base_desc);
1162 
1163 	/* Unregister the io_device. */
1164 	spdk_io_device_unregister(crypto_bdev, _device_unregister_cb);
1165 
1166 	g_number_of_claimed_volumes--;
1167 
1168 	return 0;
1169 }
1170 
1171 /* We supplied this as an entry point for upper layers who want to communicate to this
1172  * bdev.  This is how they get a channel. We are passed the same context we provided when
1173  * we created our crypto vbdev in examine() which, for this bdev, is the address of one of
1174  * our context nodes. From here we'll ask the SPDK channel code to fill out our channel
1175  * struct and we'll keep it in our crypto node.
1176  */
1177 static struct spdk_io_channel *
1178 vbdev_crypto_get_io_channel(void *ctx)
1179 {
1180 	struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx;
1181 
1182 	/* The IO channel code will allocate a channel for us which consists of
1183 	 * the SPDK channel structure plus the size of our crypto_io_channel struct
1184 	 * that we passed in when we registered our IO device. It will then call
1185 	 * our channel create callback to populate any elements that we need to
1186 	 * update.
1187 	 */
1188 	return spdk_get_io_channel(crypto_bdev);
1189 }
1190 
1191 /* This is the output for bdev_get_bdevs() for this vbdev */
1192 static int
1193 vbdev_crypto_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
1194 {
1195 	struct vbdev_crypto *crypto_bdev = (struct vbdev_crypto *)ctx;
1196 
1197 	spdk_json_write_name(w, "crypto");
1198 	spdk_json_write_object_begin(w);
1199 	spdk_json_write_named_string(w, "base_bdev_name", spdk_bdev_get_name(crypto_bdev->base_bdev));
1200 	spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&crypto_bdev->crypto_bdev));
1201 	spdk_json_write_named_string(w, "crypto_pmd", crypto_bdev->drv_name);
1202 	spdk_json_write_named_string(w, "key", crypto_bdev->key);
1203 	spdk_json_write_object_end(w);
1204 	return 0;
1205 }
1206 
1207 static int
1208 vbdev_crypto_config_json(struct spdk_json_write_ctx *w)
1209 {
1210 	struct vbdev_crypto *crypto_bdev;
1211 
1212 	TAILQ_FOREACH(crypto_bdev, &g_vbdev_crypto, link) {
1213 		spdk_json_write_object_begin(w);
1214 		spdk_json_write_named_string(w, "method", "bdev_crypto_create");
1215 		spdk_json_write_named_object_begin(w, "params");
1216 		spdk_json_write_named_string(w, "base_bdev_name", spdk_bdev_get_name(crypto_bdev->base_bdev));
1217 		spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&crypto_bdev->crypto_bdev));
1218 		spdk_json_write_named_string(w, "crypto_pmd", crypto_bdev->drv_name);
1219 		spdk_json_write_named_string(w, "key", crypto_bdev->key);
1220 		spdk_json_write_object_end(w);
1221 		spdk_json_write_object_end(w);
1222 	}
1223 	return 0;
1224 }
1225 
1226 /* We provide this callback for the SPDK channel code to create a channel using
1227  * the channel struct we provided in our module get_io_channel() entry point. Here
1228  * we get and save off an underlying base channel of the device below us so that
1229  * we can communicate with the base bdev on a per channel basis. We also register the
1230  * poller used to complete crypto operations from the device.
1231  */
1232 static int
1233 crypto_bdev_ch_create_cb(void *io_device, void *ctx_buf)
1234 {
1235 	struct crypto_io_channel *crypto_ch = ctx_buf;
1236 	struct vbdev_crypto *crypto_bdev = io_device;
1237 	struct device_qp *device_qp;
1238 
1239 	crypto_ch->base_ch = spdk_bdev_get_io_channel(crypto_bdev->base_desc);
1240 	crypto_ch->poller = spdk_poller_register(crypto_dev_poller, crypto_ch, 0);
1241 	crypto_ch->device_qp = NULL;
1242 
1243 	pthread_mutex_lock(&g_device_qp_lock);
1244 	TAILQ_FOREACH(device_qp, &g_device_qp, link) {
1245 		if ((strcmp(device_qp->device->cdev_info.driver_name, crypto_bdev->drv_name) == 0) &&
1246 		    (device_qp->in_use == false)) {
1247 			crypto_ch->device_qp = device_qp;
1248 			device_qp->in_use = true;
1249 			break;
1250 		}
1251 	}
1252 	pthread_mutex_unlock(&g_device_qp_lock);
1253 	assert(crypto_ch->device_qp);
1254 
1255 	/* We use this queue to track outstanding IO in our layer. */
1256 	TAILQ_INIT(&crypto_ch->pending_cry_ios);
1257 
1258 	/* We use this to queue up crypto ops when the device is busy. */
1259 	TAILQ_INIT(&crypto_ch->queued_cry_ops);
1260 
1261 	return 0;
1262 }
1263 
1264 /* We provide this callback for the SPDK channel code to destroy a channel
1265  * created with our create callback. We just need to undo anything we did
1266  * when we created.
1267  */
1268 static void
1269 crypto_bdev_ch_destroy_cb(void *io_device, void *ctx_buf)
1270 {
1271 	struct crypto_io_channel *crypto_ch = ctx_buf;
1272 
1273 	pthread_mutex_lock(&g_device_qp_lock);
1274 	crypto_ch->device_qp->in_use = false;
1275 	pthread_mutex_unlock(&g_device_qp_lock);
1276 
1277 	spdk_poller_unregister(&crypto_ch->poller);
1278 	spdk_put_io_channel(crypto_ch->base_ch);
1279 }
1280 
1281 /* Create the association from the bdev and vbdev name and insert
1282  * on the global list. */
1283 static int
1284 vbdev_crypto_insert_name(const char *bdev_name, const char *vbdev_name,
1285 			 const char *crypto_pmd, const char *key)
1286 {
1287 	struct bdev_names *name;
1288 	int rc, j;
1289 	bool found = false;
1290 
1291 	TAILQ_FOREACH(name, &g_bdev_names, link) {
1292 		if (strcmp(vbdev_name, name->vbdev_name) == 0) {
1293 			SPDK_ERRLOG("crypto bdev %s already exists\n", vbdev_name);
1294 			return -EEXIST;
1295 		}
1296 	}
1297 
1298 	name = calloc(1, sizeof(struct bdev_names));
1299 	if (!name) {
1300 		SPDK_ERRLOG("could not allocate bdev_names\n");
1301 		return -ENOMEM;
1302 	}
1303 
1304 	name->bdev_name = strdup(bdev_name);
1305 	if (!name->bdev_name) {
1306 		SPDK_ERRLOG("could not allocate name->bdev_name\n");
1307 		rc = -ENOMEM;
1308 		goto error_alloc_bname;
1309 	}
1310 
1311 	name->vbdev_name = strdup(vbdev_name);
1312 	if (!name->vbdev_name) {
1313 		SPDK_ERRLOG("could not allocate name->vbdev_name\n");
1314 		rc = -ENOMEM;
1315 		goto error_alloc_vname;
1316 	}
1317 
1318 	name->drv_name = strdup(crypto_pmd);
1319 	if (!name->drv_name) {
1320 		SPDK_ERRLOG("could not allocate name->drv_name\n");
1321 		rc = -ENOMEM;
1322 		goto error_alloc_dname;
1323 	}
1324 	for (j = 0; j < MAX_NUM_DRV_TYPES ; j++) {
1325 		if (strcmp(crypto_pmd, g_driver_names[j]) == 0) {
1326 			found = true;
1327 			break;
1328 		}
1329 	}
1330 	if (!found) {
1331 		SPDK_ERRLOG("invalid crypto PMD type %s\n", crypto_pmd);
1332 		rc = -EINVAL;
1333 		goto error_invalid_pmd;
1334 	}
1335 
1336 	name->key = strdup(key);
1337 	if (!name->key) {
1338 		SPDK_ERRLOG("could not allocate name->key\n");
1339 		rc = -ENOMEM;
1340 		goto error_alloc_key;
1341 	}
1342 	if (strlen(name->key) != AES_CBC_KEY_LENGTH) {
1343 		SPDK_ERRLOG("invalid AES_CCB key length\n");
1344 		rc = -EINVAL;
1345 		goto error_invalid_key;
1346 	}
1347 
1348 	TAILQ_INSERT_TAIL(&g_bdev_names, name, link);
1349 
1350 	return 0;
1351 
1352 	/* Error cleanup paths. */
1353 error_invalid_key:
1354 error_alloc_key:
1355 error_invalid_pmd:
1356 	free(name->drv_name);
1357 error_alloc_dname:
1358 	free(name->vbdev_name);
1359 error_alloc_vname:
1360 	free(name->bdev_name);
1361 error_alloc_bname:
1362 	free(name);
1363 	return rc;
1364 }
1365 
1366 /* RPC entry point for crypto creation. */
1367 int
1368 create_crypto_disk(const char *bdev_name, const char *vbdev_name,
1369 		   const char *crypto_pmd, const char *key)
1370 {
1371 	struct spdk_bdev *bdev = NULL;
1372 	int rc = 0;
1373 
1374 	bdev = spdk_bdev_get_by_name(bdev_name);
1375 
1376 	rc = vbdev_crypto_insert_name(bdev_name, vbdev_name, crypto_pmd, key);
1377 	if (rc) {
1378 		return rc;
1379 	}
1380 
1381 	if (!bdev) {
1382 		SPDK_NOTICELOG("vbdev creation deferred pending base bdev arrival\n");
1383 		return 0;
1384 	}
1385 
1386 	rc = vbdev_crypto_claim(bdev);
1387 	if (rc) {
1388 		return rc;
1389 	}
1390 
1391 	return rc;
1392 }
1393 
1394 /* Called at driver init time, parses config file to prepare for examine calls,
1395  * also fully initializes the crypto drivers.
1396  */
1397 static int
1398 vbdev_crypto_init(void)
1399 {
1400 	struct spdk_conf_section *sp = NULL;
1401 	const char *conf_bdev_name = NULL;
1402 	const char *conf_vbdev_name = NULL;
1403 	const char *crypto_pmd = NULL;
1404 	int i;
1405 	int rc = 0;
1406 	const char *key = NULL;
1407 
1408 	/* Fully configure both SW and HW drivers. */
1409 	rc = vbdev_crypto_init_crypto_drivers();
1410 	if (rc) {
1411 		SPDK_ERRLOG("Error setting up crypto devices\n");
1412 		return rc;
1413 	}
1414 
1415 	sp = spdk_conf_find_section(NULL, "crypto");
1416 	if (sp == NULL) {
1417 		return 0;
1418 	}
1419 
1420 	for (i = 0; ; i++) {
1421 
1422 		if (!spdk_conf_section_get_nval(sp, "CRY", i)) {
1423 			break;
1424 		}
1425 
1426 		conf_bdev_name = spdk_conf_section_get_nmval(sp, "CRY", i, 0);
1427 		if (!conf_bdev_name) {
1428 			SPDK_ERRLOG("crypto configuration missing bdev name\n");
1429 			return -EINVAL;
1430 		}
1431 
1432 		conf_vbdev_name = spdk_conf_section_get_nmval(sp, "CRY", i, 1);
1433 		if (!conf_vbdev_name) {
1434 			SPDK_ERRLOG("crypto configuration missing crypto_bdev name\n");
1435 			return -EINVAL;
1436 		}
1437 
1438 		key = spdk_conf_section_get_nmval(sp, "CRY", i, 2);
1439 		if (!key) {
1440 			SPDK_ERRLOG("crypto configuration missing crypto_bdev key\n");
1441 			return -EINVAL;
1442 		}
1443 		SPDK_NOTICELOG("WARNING: You are storing your key in a plain text file!!\n");
1444 
1445 		crypto_pmd = spdk_conf_section_get_nmval(sp, "CRY", i, 3);
1446 		if (!crypto_pmd) {
1447 			SPDK_ERRLOG("crypto configuration missing driver type\n");
1448 			return -EINVAL;
1449 		}
1450 
1451 		rc = vbdev_crypto_insert_name(conf_bdev_name, conf_vbdev_name,
1452 					      crypto_pmd, key);
1453 		if (rc != 0) {
1454 			return rc;
1455 		}
1456 	}
1457 
1458 	return rc;
1459 }
1460 
1461 /* Called when the entire module is being torn down. */
1462 static void
1463 vbdev_crypto_finish(void)
1464 {
1465 	struct bdev_names *name;
1466 	struct vbdev_dev *device;
1467 	struct device_qp *dev_qp;
1468 	unsigned i;
1469 	int rc;
1470 
1471 	while ((name = TAILQ_FIRST(&g_bdev_names))) {
1472 		TAILQ_REMOVE(&g_bdev_names, name, link);
1473 		free(name->drv_name);
1474 		free(name->key);
1475 		free(name->bdev_name);
1476 		free(name->vbdev_name);
1477 		free(name);
1478 	}
1479 
1480 	while ((device = TAILQ_FIRST(&g_vbdev_devs))) {
1481 		struct rte_cryptodev *rte_dev;
1482 
1483 		TAILQ_REMOVE(&g_vbdev_devs, device, link);
1484 		rte_cryptodev_stop(device->cdev_id);
1485 
1486 		assert(device->cdev_id < RTE_CRYPTO_MAX_DEVS);
1487 		rte_dev = &rte_cryptodevs[device->cdev_id];
1488 
1489 		if (rte_dev->dev_ops->queue_pair_release != NULL) {
1490 			for (i = 0; i < device->cdev_info.max_nb_queue_pairs; i++) {
1491 				rte_dev->dev_ops->queue_pair_release(rte_dev, i);
1492 			}
1493 		}
1494 		free(device);
1495 	}
1496 	rc = rte_vdev_uninit(AESNI_MB);
1497 	if (rc) {
1498 		SPDK_ERRLOG("%d from rte_vdev_uninit\n", rc);
1499 	}
1500 
1501 	while ((dev_qp = TAILQ_FIRST(&g_device_qp))) {
1502 		TAILQ_REMOVE(&g_device_qp, dev_qp, link);
1503 		free(dev_qp);
1504 	}
1505 
1506 	rte_mempool_free(g_crypto_op_mp);
1507 	spdk_mempool_free(g_mbuf_mp);
1508 	rte_mempool_free(g_session_mp);
1509 	if (g_session_mp_priv != NULL) {
1510 		rte_mempool_free(g_session_mp_priv);
1511 	}
1512 }
1513 
1514 /* During init we'll be asked how much memory we'd like passed to us
1515  * in bev_io structures as context. Here's where we specify how
1516  * much context we want per IO.
1517  */
1518 static int
1519 vbdev_crypto_get_ctx_size(void)
1520 {
1521 	return sizeof(struct crypto_bdev_io);
1522 }
1523 
1524 /* Called when SPDK wants to save the current config of this vbdev module to
1525  * a file.
1526  */
1527 static void
1528 vbdev_crypto_get_spdk_running_config(FILE *fp)
1529 {
1530 	struct bdev_names *names = NULL;
1531 	fprintf(fp, "\n[crypto]\n");
1532 	TAILQ_FOREACH(names, &g_bdev_names, link) {
1533 		fprintf(fp, "  crypto %s %s ", names->bdev_name, names->vbdev_name);
1534 		fprintf(fp, "\n");
1535 	}
1536 
1537 	fprintf(fp, "\n");
1538 }
1539 
1540 /* Called when the underlying base bdev goes away. */
1541 static void
1542 vbdev_crypto_examine_hotremove_cb(void *ctx)
1543 {
1544 	struct vbdev_crypto *crypto_bdev, *tmp;
1545 	struct spdk_bdev *bdev_find = ctx;
1546 
1547 	TAILQ_FOREACH_SAFE(crypto_bdev, &g_vbdev_crypto, link, tmp) {
1548 		if (bdev_find == crypto_bdev->base_bdev) {
1549 			spdk_bdev_unregister(&crypto_bdev->crypto_bdev, NULL, NULL);
1550 		}
1551 	}
1552 }
1553 
1554 static void
1555 vbdev_crypto_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
1556 {
1557 	/* No config per bdev needed */
1558 }
1559 
1560 /* When we register our bdev this is how we specify our entry points. */
1561 static const struct spdk_bdev_fn_table vbdev_crypto_fn_table = {
1562 	.destruct		= vbdev_crypto_destruct,
1563 	.submit_request		= vbdev_crypto_submit_request,
1564 	.io_type_supported	= vbdev_crypto_io_type_supported,
1565 	.get_io_channel		= vbdev_crypto_get_io_channel,
1566 	.dump_info_json		= vbdev_crypto_dump_info_json,
1567 	.write_config_json	= vbdev_crypto_write_config_json
1568 };
1569 
1570 static struct spdk_bdev_module crypto_if = {
1571 	.name = "crypto",
1572 	.module_init = vbdev_crypto_init,
1573 	.config_text = vbdev_crypto_get_spdk_running_config,
1574 	.get_ctx_size = vbdev_crypto_get_ctx_size,
1575 	.examine_config = vbdev_crypto_examine,
1576 	.module_fini = vbdev_crypto_finish,
1577 	.config_json = vbdev_crypto_config_json
1578 };
1579 
1580 SPDK_BDEV_MODULE_REGISTER(crypto, &crypto_if)
1581 
1582 static int
1583 vbdev_crypto_claim(struct spdk_bdev *bdev)
1584 {
1585 	struct bdev_names *name;
1586 	struct vbdev_crypto *vbdev;
1587 	struct vbdev_dev *device;
1588 	bool found = false;
1589 	int rc = 0;
1590 
1591 	if (g_number_of_claimed_volumes >= MAX_CRYPTO_VOLUMES) {
1592 		SPDK_DEBUGLOG(SPDK_LOG_CRYPTO, "Reached max number of claimed volumes\n");
1593 		rc = -EINVAL;
1594 		goto error_vbdev_alloc;
1595 	}
1596 	g_number_of_claimed_volumes++;
1597 
1598 	/* Check our list of names from config versus this bdev and if
1599 	 * there's a match, create the crypto_bdev & bdev accordingly.
1600 	 */
1601 	TAILQ_FOREACH(name, &g_bdev_names, link) {
1602 		if (strcmp(name->bdev_name, bdev->name) != 0) {
1603 			continue;
1604 		}
1605 		SPDK_DEBUGLOG(SPDK_LOG_CRYPTO, "Match on %s\n", bdev->name);
1606 
1607 		vbdev = calloc(1, sizeof(struct vbdev_crypto));
1608 		if (!vbdev) {
1609 			SPDK_ERRLOG("could not allocate crypto_bdev\n");
1610 			rc = -ENOMEM;
1611 			goto error_vbdev_alloc;
1612 		}
1613 
1614 		/* The base bdev that we're attaching to. */
1615 		vbdev->base_bdev = bdev;
1616 		vbdev->crypto_bdev.name = strdup(name->vbdev_name);
1617 		if (!vbdev->crypto_bdev.name) {
1618 			SPDK_ERRLOG("could not allocate crypto_bdev name\n");
1619 			rc = -ENOMEM;
1620 			goto error_bdev_name;
1621 		}
1622 
1623 		vbdev->key = strdup(name->key);
1624 		if (!vbdev->key) {
1625 			SPDK_ERRLOG("could not allocate crypto_bdev key\n");
1626 			rc = -ENOMEM;
1627 			goto error_alloc_key;
1628 		}
1629 
1630 		vbdev->drv_name = strdup(name->drv_name);
1631 		if (!vbdev->drv_name) {
1632 			SPDK_ERRLOG("could not allocate crypto_bdev drv_name\n");
1633 			rc = -ENOMEM;
1634 			goto error_drv_name;
1635 		}
1636 
1637 		vbdev->crypto_bdev.product_name = "crypto";
1638 		vbdev->crypto_bdev.write_cache = bdev->write_cache;
1639 		if (strcmp(vbdev->drv_name, QAT) == 0) {
1640 			vbdev->crypto_bdev.required_alignment =
1641 				spdk_max(spdk_u32log2(bdev->blocklen), bdev->required_alignment);
1642 			SPDK_NOTICELOG("QAT in use: Required alignment set to %u\n",
1643 				       vbdev->crypto_bdev.required_alignment);
1644 		} else {
1645 			vbdev->crypto_bdev.required_alignment = bdev->required_alignment;
1646 		}
1647 		/* Note: CRYPTO_MAX_IO is in units of bytes, optimal_io_boundary is
1648 		 * in units of blocks.
1649 		 */
1650 		if (bdev->optimal_io_boundary > 0) {
1651 			vbdev->crypto_bdev.optimal_io_boundary =
1652 				spdk_min((CRYPTO_MAX_IO / bdev->blocklen), bdev->optimal_io_boundary);
1653 		} else {
1654 			vbdev->crypto_bdev.optimal_io_boundary = (CRYPTO_MAX_IO / bdev->blocklen);
1655 		}
1656 		vbdev->crypto_bdev.split_on_optimal_io_boundary = true;
1657 		vbdev->crypto_bdev.blocklen = bdev->blocklen;
1658 		vbdev->crypto_bdev.blockcnt = bdev->blockcnt;
1659 
1660 		/* This is the context that is passed to us when the bdev
1661 		 * layer calls in so we'll save our crypto_bdev node here.
1662 		 */
1663 		vbdev->crypto_bdev.ctxt = vbdev;
1664 		vbdev->crypto_bdev.fn_table = &vbdev_crypto_fn_table;
1665 		vbdev->crypto_bdev.module = &crypto_if;
1666 		TAILQ_INSERT_TAIL(&g_vbdev_crypto, vbdev, link);
1667 
1668 		spdk_io_device_register(vbdev, crypto_bdev_ch_create_cb, crypto_bdev_ch_destroy_cb,
1669 					sizeof(struct crypto_io_channel), vbdev->crypto_bdev.name);
1670 
1671 		rc = spdk_bdev_open(bdev, true, vbdev_crypto_examine_hotremove_cb,
1672 				    bdev, &vbdev->base_desc);
1673 		if (rc) {
1674 			SPDK_ERRLOG("could not open bdev %s\n", spdk_bdev_get_name(bdev));
1675 			goto error_open;
1676 		}
1677 
1678 		rc = spdk_bdev_module_claim_bdev(bdev, vbdev->base_desc, vbdev->crypto_bdev.module);
1679 		if (rc) {
1680 			SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(bdev));
1681 			goto error_claim;
1682 		}
1683 
1684 		/* To init the session we have to get the cryptoDev device ID for this vbdev */
1685 		TAILQ_FOREACH(device, &g_vbdev_devs, link) {
1686 			if (strcmp(device->cdev_info.driver_name, vbdev->drv_name) == 0) {
1687 				found = true;
1688 				break;
1689 			}
1690 		}
1691 		if (found == false) {
1692 			SPDK_ERRLOG("ERROR can't match crypto device driver to crypto vbdev!\n");
1693 			rc = -EINVAL;
1694 			goto error_cant_find_devid;
1695 		}
1696 
1697 		/* Get sessions. */
1698 		vbdev->session_encrypt = rte_cryptodev_sym_session_create(g_session_mp);
1699 		if (NULL == vbdev->session_encrypt) {
1700 			SPDK_ERRLOG("ERROR trying to create crypto session!\n");
1701 			rc = -EINVAL;
1702 			goto error_session_en_create;
1703 		}
1704 
1705 		vbdev->session_decrypt = rte_cryptodev_sym_session_create(g_session_mp);
1706 		if (NULL == vbdev->session_decrypt) {
1707 			SPDK_ERRLOG("ERROR trying to create crypto session!\n");
1708 			rc = -EINVAL;
1709 			goto error_session_de_create;
1710 		}
1711 
1712 		/* Init our per vbdev xform with the desired cipher options. */
1713 		vbdev->cipher_xform.type = RTE_CRYPTO_SYM_XFORM_CIPHER;
1714 		vbdev->cipher_xform.cipher.key.data = vbdev->key;
1715 		vbdev->cipher_xform.cipher.iv.offset = IV_OFFSET;
1716 		vbdev->cipher_xform.cipher.algo = RTE_CRYPTO_CIPHER_AES_CBC;
1717 		vbdev->cipher_xform.cipher.key.length = AES_CBC_KEY_LENGTH;
1718 		vbdev->cipher_xform.cipher.iv.length = AES_CBC_IV_LENGTH;
1719 
1720 		vbdev->cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT;
1721 		rc = rte_cryptodev_sym_session_init(device->cdev_id, vbdev->session_encrypt,
1722 						    &vbdev->cipher_xform,
1723 						    g_session_mp_priv ? g_session_mp_priv : g_session_mp);
1724 		if (rc < 0) {
1725 			SPDK_ERRLOG("ERROR trying to init encrypt session!\n");
1726 			rc = -EINVAL;
1727 			goto error_session_init;
1728 		}
1729 
1730 		vbdev->cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_DECRYPT;
1731 		rc = rte_cryptodev_sym_session_init(device->cdev_id, vbdev->session_decrypt,
1732 						    &vbdev->cipher_xform,
1733 						    g_session_mp_priv ? g_session_mp_priv : g_session_mp);
1734 		if (rc < 0) {
1735 			SPDK_ERRLOG("ERROR trying to init decrypt session!\n");
1736 			rc = -EINVAL;
1737 			goto error_session_init;
1738 		}
1739 
1740 		rc = spdk_bdev_register(&vbdev->crypto_bdev);
1741 		if (rc < 0) {
1742 			SPDK_ERRLOG("ERROR trying to register bdev\n");
1743 			rc = -EINVAL;
1744 			goto error_bdev_register;
1745 		}
1746 		SPDK_DEBUGLOG(SPDK_LOG_CRYPTO, "registered io_device and virtual bdev for: %s\n",
1747 			      name->vbdev_name);
1748 		break;
1749 	}
1750 
1751 	return rc;
1752 
1753 	/* Error cleanup paths. */
1754 error_bdev_register:
1755 error_session_init:
1756 	rte_cryptodev_sym_session_free(vbdev->session_decrypt);
1757 error_session_de_create:
1758 	rte_cryptodev_sym_session_free(vbdev->session_encrypt);
1759 error_session_en_create:
1760 error_cant_find_devid:
1761 error_claim:
1762 	spdk_bdev_close(vbdev->base_desc);
1763 error_open:
1764 	TAILQ_REMOVE(&g_vbdev_crypto, vbdev, link);
1765 	spdk_io_device_unregister(vbdev, NULL);
1766 	free(vbdev->drv_name);
1767 error_drv_name:
1768 	free(vbdev->key);
1769 error_alloc_key:
1770 	free(vbdev->crypto_bdev.name);
1771 error_bdev_name:
1772 	free(vbdev);
1773 error_vbdev_alloc:
1774 	g_number_of_claimed_volumes--;
1775 	return rc;
1776 }
1777 
1778 /* RPC entry for deleting a crypto vbdev. */
1779 void
1780 delete_crypto_disk(struct spdk_bdev *bdev, spdk_delete_crypto_complete cb_fn,
1781 		   void *cb_arg)
1782 {
1783 	struct bdev_names *name;
1784 
1785 	if (!bdev || bdev->module != &crypto_if) {
1786 		cb_fn(cb_arg, -ENODEV);
1787 		return;
1788 	}
1789 
1790 	/* Remove the association (vbdev, bdev) from g_bdev_names. This is required so that the
1791 	 * vbdev does not get re-created if the same bdev is constructed at some other time,
1792 	 * unless the underlying bdev was hot-removed.
1793 	 */
1794 	TAILQ_FOREACH(name, &g_bdev_names, link) {
1795 		if (strcmp(name->vbdev_name, bdev->name) == 0) {
1796 			TAILQ_REMOVE(&g_bdev_names, name, link);
1797 			free(name->bdev_name);
1798 			free(name->vbdev_name);
1799 			free(name->drv_name);
1800 			free(name->key);
1801 			free(name);
1802 			break;
1803 		}
1804 	}
1805 
1806 	/* Additional cleanup happens in the destruct callback. */
1807 	spdk_bdev_unregister(bdev, cb_fn, cb_arg);
1808 }
1809 
1810 /* Because we specified this function in our crypto bdev function table when we
1811  * registered our crypto bdev, we'll get this call anytime a new bdev shows up.
1812  * Here we need to decide if we care about it and if so what to do. We
1813  * parsed the config file at init so we check the new bdev against the list
1814  * we built up at that time and if the user configured us to attach to this
1815  * bdev, here's where we do it.
1816  */
1817 static void
1818 vbdev_crypto_examine(struct spdk_bdev *bdev)
1819 {
1820 	vbdev_crypto_claim(bdev);
1821 	spdk_bdev_module_examine_done(&crypto_if);
1822 }
1823 
1824 SPDK_LOG_REGISTER_COMPONENT("vbdev_crypto", SPDK_LOG_CRYPTO)
1825