xref: /spdk/include/spdk_internal/mlx5.h (revision d1c46ed8e5f61500a9ef69d922f8d3f89a4e9cb3)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES.
3  *   All rights reserved.
4  */
5 
6 #ifndef SPDK_MLX5_H
7 #define SPDK_MLX5_H
8 
9 #include <infiniband/mlx5dv.h>
10 
11 #include "spdk/tree.h"
12 
13 #define SPDK_MLX5_DEV_MAX_NAME_LEN 64
14 
15 /* API for low level PRM based mlx5 driver implementation. Some terminology:
16  * PRM - Programming Reference Manual
17  * QP - Queue Pair
18  * SQ - Submission Queue
19  * CQ - Completion Queue
20  * WQE - Work Queue Element
21  * WQEBB - Work Queue Element Build Block (64 bytes)
22  * CQE - Completion Queue Entry
23  * BSF - Byte Stream Format - part of UMR WQ which describes specific data properties such as encryption or signature
24  * UMR - User Memory Region
25  * DEK - Data Encryption Key
26  */
27 
28 #define SPDK_MLX5_VENDOR_ID_MELLANOX 0x2c9
29 
30 struct spdk_mlx5_crypto_dek_legacy;
31 struct spdk_mlx5_crypto_keytag;
32 
33 enum {
34 	/** Error Completion Event - generate CQE on error for every CTRL segment, even one without CQ_UPDATE bit.
35 	 * Don't generate CQE in other cases. Default behaviour */
36 	SPDK_MLX5_WQE_CTRL_CE_CQ_ECE			= 3 << 2,
37 	/** Do not generate IBV_WC_WR_FLUSH_ERR for non-signaled CTRL segments. Completions are generated only for
38 	 * signaled (CQ_UPDATE) CTRL segments and the first error */
39 	SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR		= 1 << 2,
40 	/** Always generate CQE for CTRL segment WQE */
41 	SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE			= MLX5_WQE_CTRL_CQ_UPDATE,
42 	SPDK_MLX5_WQE_CTRL_CE_MASK			= 3 << 2,
43 	SPDK_MLX5_WQE_CTRL_SOLICITED			= MLX5_WQE_CTRL_SOLICITED,
44 	/** WQE starts execution only after all previous Read/Atomic WQEs complete */
45 	SPDK_MLX5_WQE_CTRL_FENCE			= MLX5_WQE_CTRL_FENCE,
46 	/** WQE starts execution after all local WQEs (memory operation, gather) complete */
47 	SPDK_MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE	= MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE,
48 	/** WQE starts execution only after all previous WQEs complete */
49 	SPDK_MLX5_WQE_CTRL_STRONG_ORDERING		= 3 << 5,
50 };
51 
52 struct spdk_mlx5_crypto_dek_create_attr {
53 	/* Data Encryption Key in binary form */
54 	char *dek;
55 	/* Length of the dek */
56 	size_t dek_len;
57 };
58 
59 struct spdk_mlx5_cq;
60 struct spdk_mlx5_qp;
61 
62 struct spdk_mlx5_cq_attr {
63 	uint32_t cqe_cnt;
64 	uint32_t cqe_size;
65 	void *cq_context;
66 	struct ibv_comp_channel *comp_channel;
67 	int comp_vector;
68 };
69 
70 struct spdk_mlx5_qp_attr {
71 	struct ibv_qp_cap cap;
72 	bool sigall;
73 	/* If set then CQ_UPDATE will be cleared for every ctrl WQE and only last ctrl WQE before ringing the doorbell
74 	 * will be updated with CQ_UPDATE flag */
75 	bool siglast;
76 };
77 
78 struct spdk_mlx5_cq_completion {
79 	union {
80 		uint64_t wr_id;
81 		uint32_t mkey; /* applicable if status == MLX5_CQE_SYNDROME_SIGERR */
82 	};
83 	int status;
84 };
85 
86 struct spdk_mlx5_mkey_pool;
87 
88 enum spdk_mlx5_mkey_pool_flags {
89 	SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO = 1 << 0,
90 	SPDK_MLX5_MKEY_POOL_FLAG_SIGNATURE = 1 << 1,
91 	/* Max number of pools of different types */
92 	SPDK_MLX5_MKEY_POOL_FLAG_COUNT = 2,
93 };
94 
95 struct spdk_mlx5_mkey_pool_param {
96 	uint32_t mkey_count;
97 	uint32_t cache_per_thread;
98 	/* enum spdk_mlx5_mkey_pool_flags */
99 	uint32_t flags;
100 };
101 
102 struct spdk_mlx5_mkey_pool_obj {
103 	uint32_t mkey;
104 	/* Determines which pool the mkey belongs to. See \ref spdk_mlx5_mkey_pool_flags */
105 	uint8_t pool_flag;
106 	RB_ENTRY(spdk_mlx5_mkey_pool_obj) node;
107 	struct {
108 		uint32_t sigerr_count;
109 		bool sigerr;
110 	} sig;
111 };
112 
113 struct spdk_mlx5_umr_attr {
114 	struct ibv_sge *sge;
115 	uint32_t mkey; /* User Memory Region key to configure */
116 	uint32_t umr_len;
117 	uint16_t sge_count;
118 };
119 
120 enum spdk_mlx5_encryption_order {
121 	SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_WIRE_SIGNATURE    = 0x0,
122 	SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_MEMORY_SIGNATURE  = 0x1,
123 	SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_WIRE          = 0x2,
124 	SPDK_MLX5_ENCRYPTION_ORDER_ENCRYPTED_RAW_MEMORY        = 0x3,
125 };
126 
127 enum spdk_mlx5_block_size_selector {
128 	SPDK_MLX5_BLOCK_SIZE_SELECTOR_RESERVED	= 0,
129 	SPDK_MLX5_BLOCK_SIZE_SELECTOR_512	= 1,
130 	SPDK_MLX5_BLOCK_SIZE_SELECTOR_520	= 2,
131 	SPDK_MLX5_BLOCK_SIZE_SELECTOR_4096	= 3,
132 	SPDK_MLX5_BLOCK_SIZE_SELECTOR_4160	= 4,
133 };
134 
135 enum spdk_mlx5_crypto_key_tweak_mode {
136 	SPDK_MLX5_CRYPTO_KEY_TWEAK_MODE_SIMPLE_LBA_BE	= 0,
137 	SPDK_MLX5_CRYPTO_KEY_TWEAK_MODE_SIMPLE_LBA_LE	= 1,
138 };
139 
140 struct spdk_mlx5_crypto_dek_data {
141 	/** low level devx obj id which represents the DEK */
142 	uint32_t dek_obj_id;
143 	/** Crypto key tweak mode */
144 	enum spdk_mlx5_crypto_key_tweak_mode tweak_mode;
145 };
146 
147 struct spdk_mlx5_umr_crypto_attr {
148 	uint8_t enc_order; /* see \ref enum spdk_mlx5_encryption_order */
149 	uint8_t bs_selector; /* see \ref enum spdk_mlx5_block_size_selector */
150 	uint8_t tweak_mode; /* see \ref enum spdk_mlx5_crypto_key_tweak_mode */
151 	/* Low level ID of the Data Encryption Key */
152 	uint32_t dek_obj_id;
153 	uint64_t xts_iv;
154 	uint64_t keytag; /* Must match DEK's keytag or 0 */
155 };
156 
157 /* Persistent Signature Value (PSV) is used to contain a calculated signature value for a single signature
158  * along with some meta-data, such as error flags and status flags */
159 struct spdk_mlx5_psv {
160 	struct mlx5dv_devx_obj *devx_obj;
161 	uint32_t index;
162 };
163 
164 enum spdk_mlx5_umr_sig_domain {
165 	SPDK_MLX5_UMR_SIG_DOMAIN_MEMORY,
166 	SPDK_MLX5_UMR_SIG_DOMAIN_WIRE
167 };
168 
169 struct spdk_mlx5_umr_sig_attr {
170 	uint32_t seed;
171 	/* Index of the PSV used by this UMR */
172 	uint32_t psv_index;
173 	enum spdk_mlx5_umr_sig_domain domain;
174 	/* Number of sigerr completions received on the UMR */
175 	uint32_t sigerr_count;
176 	/* Number of bytes covered by this UMR */
177 	uint32_t raw_data_size;
178 	bool init; /* Set to true on the first UMR to initialize signature with its default values */
179 	bool check_gen; /* Set to true for the last UMR to generate signature */
180 };
181 
182 struct spdk_mlx5_device_crypto_caps {
183 	bool wrapped_crypto_operational;
184 	bool wrapped_crypto_going_to_commissioning;
185 	bool wrapped_import_method_aes_xts;
186 	bool single_block_le_tweak;
187 	bool multi_block_be_tweak;
188 	bool multi_block_le_tweak;
189 };
190 
191 struct spdk_mlx5_device_caps {
192 	/* Content of this structure is valid only if crypto_supported is true */
193 	struct spdk_mlx5_device_crypto_caps crypto;
194 	bool crypto_supported;
195 	bool crc32c_supported;
196 };
197 
198 /**
199  * Query device capabilities
200  *
201  * \param context Context of a device to query
202  * \param caps Device capabilities
203  * \return 0 on success, negated errno on failure.
204  */
205 int spdk_mlx5_device_query_caps(struct ibv_context *context, struct spdk_mlx5_device_caps *caps);
206 
207 /**
208  * Create Completion Queue
209  *
210  * \note: CQ and all associated qpairs must be accessed in scope of a single thread
211  * \note: CQ size must be enough to hold completions of all connected qpairs
212  *
213  * \param pd Protection Domain
214  * \param cq_attr Attributes to be used to create CQ
215  * \param cq_out Pointer created CQ
216  * \return 0 on success, negated errno on failure. \b cq_out is set only on success result
217  */
218 int spdk_mlx5_cq_create(struct ibv_pd *pd, struct spdk_mlx5_cq_attr *cq_attr,
219 			struct spdk_mlx5_cq **cq_out);
220 
221 /**
222  * Destroy Completion Queue
223  *
224  * \param cq CQ created with \ref spdk_mlx5_cq_create
225  */
226 int spdk_mlx5_cq_destroy(struct spdk_mlx5_cq *cq);
227 
228 /**
229  * Create loopback qpair suitable for RDMA operations
230  *
231  * \param pd Protection Domain
232  * \param cq Completion Queue to bind QP to
233  * \param qp_attr Attributes to be used to create QP
234  * \param qp_out Pointer created QP
235  * \return 0 on success, negated errno on failure. \b qp_out is set only on success result
236  */
237 int spdk_mlx5_qp_create(struct ibv_pd *pd, struct spdk_mlx5_cq *cq,
238 			struct spdk_mlx5_qp_attr *qp_attr, struct spdk_mlx5_qp **qp_out);
239 
240 /**
241  * Changes internal qpair state to error causing all unprocessed Work Requests to be completed with IBV_WC_WR_FLUSH_ERR
242  * status code.
243  *
244  * \param qp qpair pointer
245  * \return 0 on success, negated errno on failure
246  */
247 int spdk_mlx5_qp_set_error_state(struct spdk_mlx5_qp *qp);
248 
249 /**
250  * Get original verbs qp
251  *
252  * \param qp mlx5 qp
253  * \return Pointer to the underlying ibv_verbs qp
254  */
255 struct ibv_qp *spdk_mlx5_qp_get_verbs_qp(struct spdk_mlx5_qp *qp);
256 
257 /**
258  * Destroy qpair
259  *
260  * \param qp QP created with \ref spdk_mlx5_qp_create
261  */
262 void spdk_mlx5_qp_destroy(struct spdk_mlx5_qp *qp);
263 
264 /**
265  * Poll Completion Queue, save up to \b max_completions into \b comp array
266  *
267  * \param cq Completion Queue
268  * \param comp Array of completions to be filled by this function
269  * \param max_completions
270  * \return 0 on success, negated errno on failure
271  */
272 int spdk_mlx5_cq_poll_completions(struct spdk_mlx5_cq *cq,
273 				  struct spdk_mlx5_cq_completion *comp, int max_completions);
274 
275 /**
276  * Ring Send Queue doorbell, submits all previously posted WQEs to HW
277  *
278  * \param qp qpair pointer
279  */
280 void spdk_mlx5_qp_complete_send(struct spdk_mlx5_qp *qp);
281 
282 /**
283  * Submit RDMA_WRITE operations on the qpair
284  *
285  * \param qp qpair pointer
286  * \param sge Memory layout of the local data to be written
287  * \param sge_count Number of \b sge entries
288  * \param dstaddr Remote address to write \b sge to
289  * \param rkey Remote memory key
290  * \param wrid wrid which is returned in the CQE
291  * \param flags SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE to have a signaled completion; Any of SPDK_MLX5_WQE_CTRL_FENCE* or 0
292  * \return 0 on success, negated errno on failure
293  */
294 int spdk_mlx5_qp_rdma_write(struct spdk_mlx5_qp *qp, struct ibv_sge *sge, uint32_t sge_count,
295 			    uint64_t dstaddr, uint32_t rkey, uint64_t wrid, uint32_t flags);
296 
297 /**
298  * Submit RDMA_WRITE operations on the qpair
299  *
300  * \param qp qpair pointer
301  * \param sge Memory layout of the local buffers for reading remote data
302  * \param sge_count Number of \b sge entries
303  * \param dstaddr Remote address to read into \b sge
304  * \param rkey Remote memory key
305  * \param wrid wrid which is returned in the CQE
306  * \param flags SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE to have a signaled completion; Any of SPDK_MLX5_WQE_CTRL_FENCE* or 0
307  * \return 0 on success, negated errno on failure
308  */
309 int spdk_mlx5_qp_rdma_read(struct spdk_mlx5_qp *qp, struct ibv_sge *sge, uint32_t sge_count,
310 			   uint64_t dstaddr, uint32_t rkey, uint64_t wrid, uint32_t flags);
311 
312 /**
313  * Configure User Memory Region obtained using \ref spdk_mlx5_mkey_pool_get_bulk with crypto capabilities.
314  *
315  * Besides crypto capabilities, it allows to gather memory chunks into virtually contig (from the NIC point of view)
316  * memory space with start address 0. The user must ensure that \b qp's capacity is enough to perform this operation.
317  * It only works if the UMR pool was created with crypto capabilities.
318  *
319  * \param qp Qpair to be used for UMR configuration. If RDMA operation which references this UMR is used on the same \b qp
320  * then it is not necessary to wait for the UMR configuration to complete. Instead, first RDMA operation after UMR
321  * configuration must have flag SPDK_MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE set to 1
322  * \param umr_attr Common UMR attributes, describe memory layout
323  * \param crypto_attr Crypto UMR attributes
324  * \param wr_id wrid which is returned in the CQE
325  * \param flags SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE to have a signaled completion; Any of SPDK_MLX5_WQE_CTRL_FENCE* or 0
326  * \return 0 on success, negated errno on failure
327  */
328 int spdk_mlx5_umr_configure_crypto(struct spdk_mlx5_qp *qp, struct spdk_mlx5_umr_attr *umr_attr,
329 				   struct spdk_mlx5_umr_crypto_attr *crypto_attr, uint64_t wr_id, uint32_t flags);
330 
331 /**
332  * Create a PSV to be used for signature operations
333  *
334  * \param pd Protection Domain PSV belongs to
335  * \return 0 on success, negated errno on failure
336  */
337 struct spdk_mlx5_psv *spdk_mlx5_create_psv(struct ibv_pd *pd);
338 
339 /**
340  * Destroy PSV
341  *
342  * \param psv PSV pointer
343  * \return 0 on success, negated errno on failure
344  */
345 int spdk_mlx5_destroy_psv(struct spdk_mlx5_psv *psv);
346 
347 /**
348  * Once a signature error happens on PSV, it's state can be re-initialized via a special SET_PSV WQE
349  *
350  * \param qp qp to be used to re-initialize PSV after error
351  * \param psv_index index of the PSV object
352  * \param crc_seed CRC32C seed to be used for initialization
353  * \param wrid wrid which is returned in the CQE
354  * \param flags SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE to have a signaled completion or 0
355  * \return 0 on success, negated errno on failure
356  */
357 int spdk_mlx5_qp_set_psv(struct spdk_mlx5_qp *qp, uint32_t psv_index, uint32_t crc_seed,
358 			 uint64_t wr_id, uint32_t flags);
359 
360 /**
361  * Configure User Memory Region obtained using \ref spdk_mlx5_mkey_pool_get_bulk with CRC32C capabilities.
362  *
363  * Besides signature capabilities, it allows to gather memory chunks into virtually contig (from the NIC point of view)
364  * memory space with start address 0. The user must ensure that \b qp's capacity is enough to perform this operation.
365  *
366  * \param qp Qpair to be used for UMR configuration. If RDMA operation which references this UMR is used on the same \b qp
367  * then it is not necessary to wait for the UMR configuration to complete. Instead, first RDMA operation after UMR
368  * configuration must have flag SPDK_MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE set to 1
369  * \param umr_attr Common UMR attributes, describe memory layout
370  * \param sig_attr Signature UMR attributes
371  * \param wr_id wrid which is returned in the CQE
372  * \param flags SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE to have a signaled completion; Any of SPDK_MLX5_WQE_CTRL_FENCE* or 0
373  * \return 0 on success, negated errno on failure
374  */
375 int spdk_mlx5_umr_configure_sig(struct spdk_mlx5_qp *qp, struct spdk_mlx5_umr_attr *umr_attr,
376 				struct spdk_mlx5_umr_sig_attr *sig_attr, uint64_t wr_id, uint32_t flags);
377 
378 /**
379  * Return a NULL terminated array of devices which support crypto operation on Nvidia NICs
380  *
381  * \param dev_num The size of the array or 0
382  * \return Array of contexts. This array must be released with \b spdk_mlx5_crypto_devs_release
383  */
384 struct ibv_context **spdk_mlx5_crypto_devs_get(int *dev_num);
385 
386 /**
387  * Releases array of devices allocated by \b spdk_mlx5_crypto_devs_get
388  *
389  * \param rdma_devs Array of device to be released
390  */
391 void spdk_mlx5_crypto_devs_release(struct ibv_context **rdma_devs);
392 
393 /**
394  * Create a keytag which contains DEKs per each crypto device in the system
395  *
396  * \param attr Crypto attributes
397  * \param out Keytag
398  * \return 0 on success, negated errno of failure
399  */
400 int spdk_mlx5_crypto_keytag_create(struct spdk_mlx5_crypto_dek_create_attr *attr,
401 				   struct spdk_mlx5_crypto_keytag **out);
402 
403 /**
404  * Destroy a keytag created using \b spdk_mlx5_crypto_keytag_create
405  *
406  * \param keytag Keytag pointer
407  */
408 void spdk_mlx5_crypto_keytag_destroy(struct spdk_mlx5_crypto_keytag *keytag);
409 
410 /**
411  * Get Data Encryption Key data
412  *
413  * \param keytag Keytag with DEKs
414  * \param pd Protection Domain which is going to be used to register UMR.
415  * \param dek_obj_id Low level DEK ID, can be used to configure crypto UMR
416  * \param data DEK data to be filled by this function
417  * \return 0 on success, negated errno on failure
418  */
419 int spdk_mlx5_crypto_get_dek_data(struct spdk_mlx5_crypto_keytag *keytag, struct ibv_pd *pd,
420 				  struct spdk_mlx5_crypto_dek_data *data);
421 
422 /**
423  * Specify which devices are allowed to be used for crypto operation.
424  *
425  * If the user doesn't call this function then all devices which support crypto will be used.
426  * This function copies devices names. In order to free allocated memory, the user must call
427  * this function with either NULL \b dev_names or with \b devs_count equal 0. This way can also
428  * be used to allow all devices.
429  *
430  * Subsequent calls with non-NULL \b dev_names and non-zero \b devs_count current copied dev_names array.
431  *
432  * This function is not thread safe.
433  *
434  * \param dev_names Array of devices names which are allowed to be used for crypto operations
435  * \param devs_count Size of \b devs_count array
436  * \return 0 on success, negated errno on failure
437  */
438 int spdk_mlx5_crypto_devs_allow(const char *const dev_names[], size_t devs_count);
439 
440 /**
441  * Creates a pool of memory keys for a given \b PD. If params::flags has SPDK_MLX5_MKEY_POOL_FLAG_CRYPTO enabled,
442  * then a device associated with PD must support crypto operations.
443  *
444  * Can be called several times for different PDs. Has no effect if a pool for \b PD with the same \b flags already exists
445  *
446  * \param params Parameter of the memory pool
447  * \param pd Protection Domain
448  * \return 0 on success, errno on failure
449  */
450 int spdk_mlx5_mkey_pool_init(struct spdk_mlx5_mkey_pool_param *params, struct ibv_pd *pd);
451 
452 /**
453  * Destroy mkey pools with the given \b flags and \b pd which was created by \ref spdk_mlx5_mkey_pool_init.
454  *
455  * The pool reference must be released by \ref spdk_mlx5_mkey_pool_put_ref before calling this function.
456  *
457  * \param pd Protection Domain
458  * \param flags Specifies type of the pool to delete.
459  * \return 0 on success, negated errno on failure
460  */
461 int spdk_mlx5_mkey_pool_destroy(uint32_t flags, struct ibv_pd *pd);
462 
463 /**
464  * Get a reference to mkey pool specified by PD, increment internal reference counter.
465  *
466  * \param pd PD to get a mkey pool for
467  * \param flags Required mkey pool flags, see \ref enum spdk_mlx5_mkey_pool_flags
468  * \return Pointer to the mkey pool on success or NULL on error
469  */
470 struct spdk_mlx5_mkey_pool *spdk_mlx5_mkey_pool_get_ref(struct ibv_pd *pd, uint32_t flags);
471 
472 /**
473  * Put the mkey pool reference.
474  *
475  * The pool is NOT destroyed if even reference counter reaches 0
476  *
477  * \param pool Mkey pool pointer
478  */
479 void spdk_mlx5_mkey_pool_put_ref(struct spdk_mlx5_mkey_pool *pool);
480 
481 /**
482  * Get several mkeys from the pool
483  *
484  * \param pool mkey pool
485  * \param mkeys array of mkey pointers to be filled by this function
486  * \param mkeys_count number of mkeys to get from the pool
487  * \return 0 on success, errno on failure
488  */
489 int spdk_mlx5_mkey_pool_get_bulk(struct spdk_mlx5_mkey_pool *pool,
490 				 struct spdk_mlx5_mkey_pool_obj **mkeys, uint32_t mkeys_count);
491 
492 /**
493  * Return mkeys to the pool
494  *
495  * \param pool mkey pool
496  * \param mkeys array of mkey pointers to be returned to the pool
497  * \param mkeys_count number of mkeys to be returned to the pool
498  */
499 void spdk_mlx5_mkey_pool_put_bulk(struct spdk_mlx5_mkey_pool *pool,
500 				  struct spdk_mlx5_mkey_pool_obj **mkeys, uint32_t mkeys_count);
501 
502 /**
503  * Notify the mlx5 library that a module which can handle UMR configuration is registered or unregistered
504  *
505  * \param registered True if the module is registered, false otherwise
506  */
507 void spdk_mlx5_umr_implementer_register(bool registered);
508 
509 /**
510  * Check whether a module which can handle UMR configuration is registered or not
511  *
512  * \return True of the UMR implementer is registered, false otherwise
513  */
514 bool spdk_mlx5_umr_implementer_is_registered(void);
515 
516 #endif /* SPDK_MLX5_H */
517