xref: /spdk/include/spdk_internal/nvme_tcp.h (revision 1e3d25b901a6b9d2dce4999e2ecbc02f98d79f05)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2018 Intel Corporation. All rights reserved.
3  *   Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved.
4  *   Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5  */
6 
7 #ifndef SPDK_INTERNAL_NVME_TCP_H
8 #define SPDK_INTERNAL_NVME_TCP_H
9 
10 #include "spdk/likely.h"
11 #include "spdk/sock.h"
12 #include "spdk/dif.h"
13 #include "spdk/hexlify.h"
14 #include "spdk/nvmf_spec.h"
15 #include "spdk/util.h"
16 #include "spdk/base64.h"
17 
18 #include "sgl.h"
19 
20 #include "openssl/evp.h"
21 #include "openssl/kdf.h"
22 #include "openssl/sha.h"
23 
24 #define SPDK_CRC32C_XOR				0xffffffffUL
25 #define SPDK_NVME_TCP_DIGEST_LEN		4
26 #define SPDK_NVME_TCP_DIGEST_ALIGNMENT		4
27 #define SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT	30
28 #define SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR	8
29 #define SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE	8192u
30 /*
31  * Maximum number of SGL elements.
32  */
33 #define NVME_TCP_MAX_SGL_DESCRIPTORS	(16)
34 
35 #define MAKE_DIGEST_WORD(BUF, CRC32C) \
36         (   ((*((uint8_t *)(BUF)+0)) = (uint8_t)((uint32_t)(CRC32C) >> 0)), \
37             ((*((uint8_t *)(BUF)+1)) = (uint8_t)((uint32_t)(CRC32C) >> 8)), \
38             ((*((uint8_t *)(BUF)+2)) = (uint8_t)((uint32_t)(CRC32C) >> 16)), \
39             ((*((uint8_t *)(BUF)+3)) = (uint8_t)((uint32_t)(CRC32C) >> 24)))
40 
41 #define MATCH_DIGEST_WORD(BUF, CRC32C) \
42         (    ((((uint32_t) *((uint8_t *)(BUF)+0)) << 0)         \
43             | (((uint32_t) *((uint8_t *)(BUF)+1)) << 8)         \
44             | (((uint32_t) *((uint8_t *)(BUF)+2)) << 16)        \
45             | (((uint32_t) *((uint8_t *)(BUF)+3)) << 24))       \
46             == (CRC32C))
47 
48 #define DGET32(B)                                                               \
49         (((  (uint32_t) *((uint8_t *)(B)+0)) << 0)                              \
50          | (((uint32_t) *((uint8_t *)(B)+1)) << 8)                              \
51          | (((uint32_t) *((uint8_t *)(B)+2)) << 16)                             \
52          | (((uint32_t) *((uint8_t *)(B)+3)) << 24))
53 
54 #define DSET32(B,D)                                                             \
55         (((*((uint8_t *)(B)+0)) = (uint8_t)((uint32_t)(D) >> 0)),               \
56          ((*((uint8_t *)(B)+1)) = (uint8_t)((uint32_t)(D) >> 8)),               \
57          ((*((uint8_t *)(B)+2)) = (uint8_t)((uint32_t)(D) >> 16)),              \
58          ((*((uint8_t *)(B)+3)) = (uint8_t)((uint32_t)(D) >> 24)))
59 
60 /* The PSK identity comprises of following components:
61  * 4-character format specifier "NVMe" +
62  * 1-character TLS protocol version indicator +
63  * 1-character PSK type indicator, specifying the used PSK +
64  * 2-characters hash specifier +
65  * NQN of the host (SPDK_NVMF_NQN_MAX_LEN -> 223) +
66  * NQN of the subsystem (SPDK_NVMF_NQN_MAX_LEN -> 223) +
67  * 2 space character separators +
68  * 1 null terminator =
69  * 457 characters. */
70 #define NVMF_PSK_IDENTITY_LEN (SPDK_NVMF_NQN_MAX_LEN + SPDK_NVMF_NQN_MAX_LEN + 11)
71 
72 /* The maximum size of hkdf_info is defined by RFC 8446, 514B (2 + 256 + 256). */
73 #define NVME_TCP_HKDF_INFO_MAX_LEN 514
74 
75 #define PSK_ID_PREFIX "NVMe0R"
76 
77 enum nvme_tcp_cipher_suite {
78 	NVME_TCP_CIPHER_AES_128_GCM_SHA256,
79 	NVME_TCP_CIPHER_AES_256_GCM_SHA384,
80 };
81 
82 typedef void (*nvme_tcp_qpair_xfer_complete_cb)(void *cb_arg);
83 
84 struct nvme_tcp_pdu {
85 	union {
86 		/* to hold error pdu data */
87 		uint8_t					raw[SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE];
88 		struct spdk_nvme_tcp_common_pdu_hdr	common;
89 		struct spdk_nvme_tcp_ic_req		ic_req;
90 		struct spdk_nvme_tcp_term_req_hdr	term_req;
91 		struct spdk_nvme_tcp_cmd		capsule_cmd;
92 		struct spdk_nvme_tcp_h2c_data_hdr	h2c_data;
93 		struct spdk_nvme_tcp_ic_resp		ic_resp;
94 		struct spdk_nvme_tcp_rsp		capsule_resp;
95 		struct spdk_nvme_tcp_c2h_data_hdr	c2h_data;
96 		struct spdk_nvme_tcp_r2t_hdr		r2t;
97 
98 	} hdr;
99 
100 	bool						has_hdgst;
101 	bool						ddgst_enable;
102 	uint32_t					data_digest_crc32;
103 	uint8_t						data_digest[SPDK_NVME_TCP_DIGEST_LEN];
104 
105 	uint8_t						ch_valid_bytes;
106 	uint8_t						psh_valid_bytes;
107 	uint8_t						psh_len;
108 
109 	nvme_tcp_qpair_xfer_complete_cb			cb_fn;
110 	void						*cb_arg;
111 
112 	/* The sock request ends with a 0 length iovec. Place the actual iovec immediately
113 	 * after it. There is a static assert below to check if the compiler inserted
114 	 * any unwanted padding */
115 	struct spdk_sock_request			sock_req;
116 	struct iovec					iov[NVME_TCP_MAX_SGL_DESCRIPTORS * 2];
117 
118 	struct iovec					data_iov[NVME_TCP_MAX_SGL_DESCRIPTORS];
119 	uint32_t					data_iovcnt;
120 	uint32_t					data_len;
121 
122 	uint32_t					rw_offset;
123 	TAILQ_ENTRY(nvme_tcp_pdu)			tailq;
124 	uint32_t					remaining;
125 	uint32_t					padding_len;
126 
127 	struct spdk_dif_ctx				*dif_ctx;
128 
129 	void						*req; /* data tied to a tcp request */
130 	void						*qpair;
131 	SLIST_ENTRY(nvme_tcp_pdu)			slist;
132 };
133 SPDK_STATIC_ASSERT(offsetof(struct nvme_tcp_pdu,
134 			    sock_req) + sizeof(struct spdk_sock_request) == offsetof(struct nvme_tcp_pdu, iov),
135 		   "Compiler inserted padding between iov and sock_req");
136 
137 enum nvme_tcp_pdu_recv_state {
138 	/* Ready to wait for PDU */
139 	NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY,
140 
141 	/* Active tqpair waiting for any PDU common header */
142 	NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH,
143 
144 	/* Active tqpair waiting for any PDU specific header */
145 	NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH,
146 
147 	/* Active tqpair waiting for a tcp request, only use in target side */
148 	NVME_TCP_PDU_RECV_STATE_AWAIT_REQ,
149 
150 	/* Active tqpair waiting for payload */
151 	NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD,
152 
153 	/* Active tqpair waiting for all outstanding PDUs to complete */
154 	NVME_TCP_PDU_RECV_STATE_QUIESCING,
155 
156 	/* Active tqpair does not wait for payload */
157 	NVME_TCP_PDU_RECV_STATE_ERROR,
158 };
159 
160 enum nvme_tcp_error_codes {
161 	NVME_TCP_PDU_IN_PROGRESS        = 0,
162 	NVME_TCP_CONNECTION_FATAL       = -1,
163 	NVME_TCP_PDU_FATAL              = -2,
164 };
165 
166 enum nvme_tcp_qpair_state {
167 	NVME_TCP_QPAIR_STATE_INVALID = 0,
168 	NVME_TCP_QPAIR_STATE_INITIALIZING = 1,
169 	NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND = 2,
170 	NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL = 3,
171 	NVME_TCP_QPAIR_STATE_RUNNING = 4,
172 	NVME_TCP_QPAIR_STATE_EXITING = 5,
173 	NVME_TCP_QPAIR_STATE_EXITED = 6,
174 };
175 
176 static const bool g_nvme_tcp_hdgst[] = {
177 	[SPDK_NVME_TCP_PDU_TYPE_IC_REQ]         = false,
178 	[SPDK_NVME_TCP_PDU_TYPE_IC_RESP]        = false,
179 	[SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ]   = false,
180 	[SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ]   = false,
181 	[SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD]    = true,
182 	[SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP]   = true,
183 	[SPDK_NVME_TCP_PDU_TYPE_H2C_DATA]       = true,
184 	[SPDK_NVME_TCP_PDU_TYPE_C2H_DATA]       = true,
185 	[SPDK_NVME_TCP_PDU_TYPE_R2T]            = true
186 };
187 
188 static const bool g_nvme_tcp_ddgst[] = {
189 	[SPDK_NVME_TCP_PDU_TYPE_IC_REQ]         = false,
190 	[SPDK_NVME_TCP_PDU_TYPE_IC_RESP]        = false,
191 	[SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ]   = false,
192 	[SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ]   = false,
193 	[SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD]    = true,
194 	[SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP]   = false,
195 	[SPDK_NVME_TCP_PDU_TYPE_H2C_DATA]       = true,
196 	[SPDK_NVME_TCP_PDU_TYPE_C2H_DATA]       = true,
197 	[SPDK_NVME_TCP_PDU_TYPE_R2T]            = false
198 };
199 
200 static uint32_t
201 nvme_tcp_pdu_calc_header_digest(struct nvme_tcp_pdu *pdu)
202 {
203 	uint32_t crc32c;
204 	uint32_t hlen = pdu->hdr.common.hlen;
205 
206 	crc32c = spdk_crc32c_update(&pdu->hdr.raw, hlen, ~0);
207 	crc32c = crc32c ^ SPDK_CRC32C_XOR;
208 	return crc32c;
209 }
210 
211 static uint32_t
212 nvme_tcp_pdu_calc_data_digest(struct nvme_tcp_pdu *pdu)
213 {
214 	uint32_t crc32c = SPDK_CRC32C_XOR;
215 	uint32_t mod;
216 
217 	assert(pdu->data_len != 0);
218 
219 	if (spdk_likely(!pdu->dif_ctx)) {
220 		crc32c = spdk_crc32c_iov_update(pdu->data_iov, pdu->data_iovcnt, crc32c);
221 	} else {
222 		spdk_dif_update_crc32c_stream(pdu->data_iov, pdu->data_iovcnt,
223 					      0, pdu->data_len, &crc32c, pdu->dif_ctx);
224 	}
225 
226 	mod = pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT;
227 	if (mod != 0) {
228 		uint32_t pad_length = SPDK_NVME_TCP_DIGEST_ALIGNMENT - mod;
229 		uint8_t pad[3] = {0, 0, 0};
230 
231 		assert(pad_length > 0);
232 		assert(pad_length <= sizeof(pad));
233 		crc32c = spdk_crc32c_update(pad, pad_length, crc32c);
234 	}
235 	return crc32c;
236 }
237 
238 static inline void
239 _nvme_tcp_sgl_get_buf(struct spdk_iov_sgl *s, void **_buf, uint32_t *_buf_len)
240 {
241 	if (_buf != NULL) {
242 		*_buf = (uint8_t *)s->iov->iov_base + s->iov_offset;
243 	}
244 	if (_buf_len != NULL) {
245 		*_buf_len = s->iov->iov_len - s->iov_offset;
246 	}
247 }
248 
249 static inline bool
250 _nvme_tcp_sgl_append_multi(struct spdk_iov_sgl *s, struct iovec *iov, int iovcnt)
251 {
252 	int i;
253 
254 	for (i = 0; i < iovcnt; i++) {
255 		if (!spdk_iov_sgl_append(s, iov[i].iov_base, iov[i].iov_len)) {
256 			return false;
257 		}
258 	}
259 
260 	return true;
261 }
262 
263 static inline uint32_t
264 _get_iov_array_size(struct iovec *iov, int iovcnt)
265 {
266 	int i;
267 	uint32_t size = 0;
268 
269 	for (i = 0; i < iovcnt; i++) {
270 		size += iov[i].iov_len;
271 	}
272 
273 	return size;
274 }
275 
276 static inline bool
277 _nvme_tcp_sgl_append_multi_with_md(struct spdk_iov_sgl *s, struct iovec *iov, int iovcnt,
278 				   uint32_t data_len, const struct spdk_dif_ctx *dif_ctx)
279 {
280 	int rc;
281 	uint32_t mapped_len = 0;
282 
283 	if (s->iov_offset >= data_len) {
284 		s->iov_offset -= _get_iov_array_size(iov, iovcnt);
285 	} else {
286 		rc = spdk_dif_set_md_interleave_iovs(s->iov, s->iovcnt, iov, iovcnt,
287 						     s->iov_offset, data_len - s->iov_offset,
288 						     &mapped_len, dif_ctx);
289 		if (rc < 0) {
290 			SPDK_ERRLOG("Failed to setup iovs for DIF insert/strip.\n");
291 			return false;
292 		}
293 
294 		s->total_size += mapped_len;
295 		s->iov_offset = 0;
296 		assert(s->iovcnt >= rc);
297 		s->iovcnt -= rc;
298 		s->iov += rc;
299 
300 		if (s->iovcnt == 0) {
301 			return false;
302 		}
303 	}
304 
305 	return true;
306 }
307 
308 static int
309 nvme_tcp_build_iovs(struct iovec *iov, int iovcnt, struct nvme_tcp_pdu *pdu,
310 		    bool hdgst_enable, bool ddgst_enable, uint32_t *_mapped_length)
311 {
312 	uint32_t hlen;
313 	uint32_t plen __attribute__((unused));
314 	struct spdk_iov_sgl sgl;
315 
316 	if (iovcnt == 0) {
317 		return 0;
318 	}
319 
320 	spdk_iov_sgl_init(&sgl, iov, iovcnt, 0);
321 	hlen = pdu->hdr.common.hlen;
322 
323 	/* Header Digest */
324 	if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && hdgst_enable) {
325 		hlen += SPDK_NVME_TCP_DIGEST_LEN;
326 	}
327 
328 	plen = hlen;
329 	if (!pdu->data_len) {
330 		/* PDU header + possible header digest */
331 		spdk_iov_sgl_append(&sgl, (uint8_t *)&pdu->hdr.raw, hlen);
332 		goto end;
333 	}
334 
335 	/* Padding */
336 	if (pdu->padding_len > 0) {
337 		hlen += pdu->padding_len;
338 		plen = hlen;
339 	}
340 
341 	if (!spdk_iov_sgl_append(&sgl, (uint8_t *)&pdu->hdr.raw, hlen)) {
342 		goto end;
343 	}
344 
345 	/* Data Segment */
346 	plen += pdu->data_len;
347 	if (spdk_likely(!pdu->dif_ctx)) {
348 		if (!_nvme_tcp_sgl_append_multi(&sgl, pdu->data_iov, pdu->data_iovcnt)) {
349 			goto end;
350 		}
351 	} else {
352 		if (!_nvme_tcp_sgl_append_multi_with_md(&sgl, pdu->data_iov, pdu->data_iovcnt,
353 							pdu->data_len, pdu->dif_ctx)) {
354 			goto end;
355 		}
356 	}
357 
358 	/* Data Digest */
359 	if (g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && ddgst_enable) {
360 		plen += SPDK_NVME_TCP_DIGEST_LEN;
361 		spdk_iov_sgl_append(&sgl, pdu->data_digest, SPDK_NVME_TCP_DIGEST_LEN);
362 	}
363 
364 	assert(plen == pdu->hdr.common.plen);
365 
366 end:
367 	if (_mapped_length != NULL) {
368 		*_mapped_length = sgl.total_size;
369 	}
370 
371 	return iovcnt - sgl.iovcnt;
372 }
373 
374 static int
375 nvme_tcp_build_payload_iovs(struct iovec *iov, int iovcnt, struct nvme_tcp_pdu *pdu,
376 			    bool ddgst_enable, uint32_t *_mapped_length)
377 {
378 	struct spdk_iov_sgl sgl;
379 
380 	if (iovcnt == 0) {
381 		return 0;
382 	}
383 
384 	spdk_iov_sgl_init(&sgl, iov, iovcnt, pdu->rw_offset);
385 
386 	if (spdk_likely(!pdu->dif_ctx)) {
387 		if (!_nvme_tcp_sgl_append_multi(&sgl, pdu->data_iov, pdu->data_iovcnt)) {
388 			goto end;
389 		}
390 	} else {
391 		if (!_nvme_tcp_sgl_append_multi_with_md(&sgl, pdu->data_iov, pdu->data_iovcnt,
392 							pdu->data_len, pdu->dif_ctx)) {
393 			goto end;
394 		}
395 	}
396 
397 	/* Data Digest */
398 	if (ddgst_enable) {
399 		spdk_iov_sgl_append(&sgl, pdu->data_digest, SPDK_NVME_TCP_DIGEST_LEN);
400 	}
401 
402 end:
403 	if (_mapped_length != NULL) {
404 		*_mapped_length = sgl.total_size;
405 	}
406 	return iovcnt - sgl.iovcnt;
407 }
408 
409 static int
410 nvme_tcp_read_data(struct spdk_sock *sock, int bytes,
411 		   void *buf)
412 {
413 	int ret;
414 
415 	ret = spdk_sock_recv(sock, buf, bytes);
416 
417 	if (ret > 0) {
418 		return ret;
419 	}
420 
421 	if (ret < 0) {
422 		if (errno == EAGAIN || errno == EWOULDBLOCK) {
423 			return 0;
424 		}
425 
426 		/* For connect reset issue, do not output error log */
427 		if (errno != ECONNRESET) {
428 			SPDK_ERRLOG("spdk_sock_recv() failed, errno %d: %s\n",
429 				    errno, spdk_strerror(errno));
430 		}
431 	}
432 
433 	/* connection closed */
434 	return NVME_TCP_CONNECTION_FATAL;
435 }
436 
437 static int
438 nvme_tcp_readv_data(struct spdk_sock *sock, struct iovec *iov, int iovcnt)
439 {
440 	int ret;
441 
442 	assert(sock != NULL);
443 	if (iov == NULL || iovcnt == 0) {
444 		return 0;
445 	}
446 
447 	if (iovcnt == 1) {
448 		return nvme_tcp_read_data(sock, iov->iov_len, iov->iov_base);
449 	}
450 
451 	ret = spdk_sock_readv(sock, iov, iovcnt);
452 
453 	if (ret > 0) {
454 		return ret;
455 	}
456 
457 	if (ret < 0) {
458 		if (errno == EAGAIN || errno == EWOULDBLOCK) {
459 			return 0;
460 		}
461 
462 		/* For connect reset issue, do not output error log */
463 		if (errno != ECONNRESET) {
464 			SPDK_ERRLOG("spdk_sock_readv() failed, errno %d: %s\n",
465 				    errno, spdk_strerror(errno));
466 		}
467 	}
468 
469 	/* connection closed */
470 	return NVME_TCP_CONNECTION_FATAL;
471 }
472 
473 
474 static int
475 nvme_tcp_read_payload_data(struct spdk_sock *sock, struct nvme_tcp_pdu *pdu)
476 {
477 	struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS + 1];
478 	int iovcnt;
479 
480 	iovcnt = nvme_tcp_build_payload_iovs(iov, NVME_TCP_MAX_SGL_DESCRIPTORS + 1, pdu,
481 					     pdu->ddgst_enable, NULL);
482 	assert(iovcnt >= 0);
483 
484 	return nvme_tcp_readv_data(sock, iov, iovcnt);
485 }
486 
487 static void
488 _nvme_tcp_pdu_set_data(struct nvme_tcp_pdu *pdu, void *data, uint32_t data_len)
489 {
490 	pdu->data_iov[0].iov_base = data;
491 	pdu->data_iov[0].iov_len = data_len;
492 	pdu->data_iovcnt = 1;
493 }
494 
495 static void
496 nvme_tcp_pdu_set_data(struct nvme_tcp_pdu *pdu, void *data, uint32_t data_len)
497 {
498 	_nvme_tcp_pdu_set_data(pdu, data, data_len);
499 	pdu->data_len = data_len;
500 }
501 
502 static void
503 nvme_tcp_pdu_set_data_buf(struct nvme_tcp_pdu *pdu,
504 			  struct iovec *iov, int iovcnt,
505 			  uint32_t data_offset, uint32_t data_len)
506 {
507 	uint32_t buf_offset, buf_len, remain_len, len;
508 	uint8_t *buf;
509 	struct spdk_iov_sgl pdu_sgl, buf_sgl;
510 
511 	pdu->data_len = data_len;
512 
513 	if (spdk_likely(!pdu->dif_ctx)) {
514 		buf_offset = data_offset;
515 		buf_len = data_len;
516 	} else {
517 		spdk_dif_ctx_set_data_offset(pdu->dif_ctx, data_offset);
518 		spdk_dif_get_range_with_md(data_offset, data_len,
519 					   &buf_offset, &buf_len, pdu->dif_ctx);
520 	}
521 
522 	if (iovcnt == 1) {
523 		_nvme_tcp_pdu_set_data(pdu, (void *)((uint64_t)iov[0].iov_base + buf_offset), buf_len);
524 	} else {
525 		spdk_iov_sgl_init(&pdu_sgl, pdu->data_iov, NVME_TCP_MAX_SGL_DESCRIPTORS, 0);
526 		spdk_iov_sgl_init(&buf_sgl, iov, iovcnt, 0);
527 
528 		spdk_iov_sgl_advance(&buf_sgl, buf_offset);
529 		remain_len = buf_len;
530 
531 		while (remain_len > 0) {
532 			_nvme_tcp_sgl_get_buf(&buf_sgl, (void *)&buf, &len);
533 			len = spdk_min(len, remain_len);
534 
535 			spdk_iov_sgl_advance(&buf_sgl, len);
536 			remain_len -= len;
537 
538 			if (!spdk_iov_sgl_append(&pdu_sgl, buf, len)) {
539 				break;
540 			}
541 		}
542 
543 		assert(remain_len == 0);
544 		assert(pdu_sgl.total_size == buf_len);
545 
546 		pdu->data_iovcnt = NVME_TCP_MAX_SGL_DESCRIPTORS - pdu_sgl.iovcnt;
547 	}
548 }
549 
550 static void
551 nvme_tcp_pdu_calc_psh_len(struct nvme_tcp_pdu *pdu, bool hdgst_enable)
552 {
553 	uint8_t psh_len, pdo, padding_len;
554 
555 	psh_len = pdu->hdr.common.hlen;
556 
557 	if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && hdgst_enable) {
558 		pdu->has_hdgst = true;
559 		psh_len += SPDK_NVME_TCP_DIGEST_LEN;
560 	}
561 	if (pdu->hdr.common.plen > psh_len) {
562 		switch (pdu->hdr.common.pdu_type) {
563 		case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
564 		case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
565 		case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA:
566 			pdo = pdu->hdr.common.pdo;
567 			padding_len = pdo - psh_len;
568 			if (padding_len > 0) {
569 				psh_len = pdo;
570 			}
571 			break;
572 		default:
573 			/* There is no padding for other PDU types */
574 			break;
575 		}
576 	}
577 
578 	psh_len -= sizeof(struct spdk_nvme_tcp_common_pdu_hdr);
579 	pdu->psh_len = psh_len;
580 }
581 
582 static inline int
583 nvme_tcp_generate_psk_identity(char *out_id, size_t out_id_len, const char *hostnqn,
584 			       const char *subnqn, enum nvme_tcp_cipher_suite tls_cipher_suite)
585 {
586 	int rc;
587 
588 	assert(out_id != NULL);
589 
590 	if (out_id_len < strlen(PSK_ID_PREFIX) + strlen(hostnqn) + strlen(subnqn) + 5) {
591 		SPDK_ERRLOG("Out buffer too small!\n");
592 		return -1;
593 	}
594 
595 	if (tls_cipher_suite == NVME_TCP_CIPHER_AES_128_GCM_SHA256) {
596 		rc = snprintf(out_id, out_id_len, "%s%s %s %s", PSK_ID_PREFIX, "01",
597 			      hostnqn, subnqn);
598 	} else if (tls_cipher_suite == NVME_TCP_CIPHER_AES_256_GCM_SHA384) {
599 		rc = snprintf(out_id, out_id_len, "%s%s %s %s", PSK_ID_PREFIX, "02",
600 			      hostnqn, subnqn);
601 	} else {
602 		SPDK_ERRLOG("Unknown cipher suite requested!\n");
603 		return -EOPNOTSUPP;
604 	}
605 
606 	if (rc < 0) {
607 		SPDK_ERRLOG("Could not generate PSK identity\n");
608 		return -1;
609 	}
610 
611 	return 0;
612 }
613 
614 enum nvme_tcp_hash_algorithm {
615 	NVME_TCP_HASH_ALGORITHM_NONE,
616 	NVME_TCP_HASH_ALGORITHM_SHA256,
617 	NVME_TCP_HASH_ALGORITHM_SHA384,
618 };
619 
620 static inline int
621 nvme_tcp_derive_retained_psk(const uint8_t *psk_in, uint64_t psk_in_size, const char *hostnqn,
622 			     uint8_t *psk_out, uint64_t psk_out_len, enum nvme_tcp_hash_algorithm psk_retained_hash)
623 {
624 	EVP_PKEY_CTX *ctx;
625 	uint64_t digest_len;
626 	uint8_t hkdf_info[NVME_TCP_HKDF_INFO_MAX_LEN] = {};
627 	const char *label = "tls13 HostNQN";
628 	size_t pos, labellen, nqnlen;
629 	const EVP_MD *hash;
630 	int rc, hkdf_info_size;
631 
632 	labellen = strlen(label);
633 	nqnlen = strlen(hostnqn);
634 	assert(nqnlen <= SPDK_NVMF_NQN_MAX_LEN);
635 
636 	*(uint16_t *)&hkdf_info[0] = htons(psk_in_size);
637 	pos = sizeof(uint16_t);
638 	hkdf_info[pos] = (uint8_t)labellen;
639 	pos += sizeof(uint8_t);
640 	memcpy(&hkdf_info[pos], label, labellen);
641 	pos += labellen;
642 	hkdf_info[pos] = (uint8_t)nqnlen;
643 	pos += sizeof(uint8_t);
644 	memcpy(&hkdf_info[pos], hostnqn, nqnlen);
645 	pos += nqnlen;
646 	hkdf_info_size = pos;
647 
648 	switch (psk_retained_hash) {
649 	case NVME_TCP_HASH_ALGORITHM_SHA256:
650 		digest_len = SHA256_DIGEST_LENGTH;
651 		hash = EVP_sha256();
652 		break;
653 	case NVME_TCP_HASH_ALGORITHM_SHA384:
654 		digest_len = SHA384_DIGEST_LENGTH;
655 		hash = EVP_sha384();
656 		break;
657 	default:
658 		SPDK_ERRLOG("Unknown PSK hash requested!\n");
659 		return -EOPNOTSUPP;
660 	}
661 
662 	if (digest_len > psk_out_len) {
663 		SPDK_ERRLOG("Insufficient buffer size for out key!\n");
664 		return -EINVAL;
665 	}
666 
667 	ctx = EVP_PKEY_CTX_new_id(EVP_PKEY_HKDF, NULL);
668 	if (!ctx) {
669 		SPDK_ERRLOG("Unable to initialize EVP_PKEY_CTX!\n");
670 		return -ENOMEM;
671 	}
672 
673 	/* EVP_PKEY_* functions returns 1 as a success code and 0 or negative on failure. */
674 	if (EVP_PKEY_derive_init(ctx) != 1) {
675 		SPDK_ERRLOG("Unable to initialize key derivation ctx for HKDF!\n");
676 		rc = -ENOMEM;
677 		goto end;
678 	}
679 	if (EVP_PKEY_CTX_set_hkdf_md(ctx, hash) != 1) {
680 		SPDK_ERRLOG("Unable to set hash for HKDF!\n");
681 		rc = -EOPNOTSUPP;
682 		goto end;
683 	}
684 	if (EVP_PKEY_CTX_set1_hkdf_key(ctx, psk_in, psk_in_size) != 1) {
685 		SPDK_ERRLOG("Unable to set PSK key for HKDF!\n");
686 		rc = -ENOBUFS;
687 		goto end;
688 	}
689 
690 	if (EVP_PKEY_CTX_add1_hkdf_info(ctx, hkdf_info, hkdf_info_size) != 1) {
691 		SPDK_ERRLOG("Unable to set info label for HKDF!\n");
692 		rc = -ENOBUFS;
693 		goto end;
694 	}
695 	if (EVP_PKEY_CTX_set1_hkdf_salt(ctx, NULL, 0) != 1) {
696 		SPDK_ERRLOG("Unable to set salt for HKDF!\n");
697 		rc = -EINVAL;
698 		goto end;
699 	}
700 	if (EVP_PKEY_derive(ctx, psk_out, &digest_len) != 1) {
701 		SPDK_ERRLOG("Unable to derive the PSK key!\n");
702 		rc = -EINVAL;
703 		goto end;
704 	}
705 
706 	rc = digest_len;
707 
708 end:
709 	EVP_PKEY_CTX_free(ctx);
710 	return rc;
711 }
712 
713 static inline int
714 nvme_tcp_derive_tls_psk(const uint8_t *psk_in, uint64_t psk_in_size, const char *psk_identity,
715 			uint8_t *psk_out, uint64_t psk_out_size, enum nvme_tcp_cipher_suite tls_cipher_suite)
716 {
717 	EVP_PKEY_CTX *ctx;
718 	uint64_t digest_len = 0;
719 	char hkdf_info[NVME_TCP_HKDF_INFO_MAX_LEN] = {};
720 	const char *label = "tls13 nvme-tls-psk";
721 	size_t pos, labellen, idlen;
722 	const EVP_MD *hash;
723 	int rc, hkdf_info_size;
724 
725 	if (tls_cipher_suite == NVME_TCP_CIPHER_AES_128_GCM_SHA256) {
726 		digest_len = SHA256_DIGEST_LENGTH;
727 		hash = EVP_sha256();
728 	} else if (tls_cipher_suite == NVME_TCP_CIPHER_AES_256_GCM_SHA384) {
729 		digest_len = SHA384_DIGEST_LENGTH;
730 		hash = EVP_sha384();
731 	} else {
732 		SPDK_ERRLOG("Unknown cipher suite requested!\n");
733 		return -EOPNOTSUPP;
734 	}
735 
736 	labellen = strlen(label);
737 	idlen = strlen(psk_identity);
738 	if (idlen > UINT8_MAX) {
739 		SPDK_ERRLOG("Invalid PSK ID: too long\n");
740 		return -1;
741 	}
742 
743 	*(uint16_t *)&hkdf_info[0] = htons(psk_in_size);
744 	pos = sizeof(uint16_t);
745 	hkdf_info[pos] = (uint8_t)labellen;
746 	pos += sizeof(uint8_t);
747 	memcpy(&hkdf_info[pos], label, labellen);
748 	pos += labellen;
749 	hkdf_info[pos] = (uint8_t)idlen;
750 	pos += sizeof(uint8_t);
751 	memcpy(&hkdf_info[pos], psk_identity, idlen);
752 	pos += idlen;
753 	hkdf_info_size = pos;
754 
755 	if (digest_len > psk_out_size) {
756 		SPDK_ERRLOG("Insufficient buffer size for out key!\n");
757 		return -1;
758 	}
759 
760 	ctx = EVP_PKEY_CTX_new_id(EVP_PKEY_HKDF, NULL);
761 	if (!ctx) {
762 		SPDK_ERRLOG("Unable to initialize EVP_PKEY_CTX!\n");
763 		return -1;
764 	}
765 
766 	if (EVP_PKEY_derive_init(ctx) != 1) {
767 		SPDK_ERRLOG("Unable to initialize key derivation ctx for HKDF!\n");
768 		rc = -ENOMEM;
769 		goto end;
770 	}
771 	if (EVP_PKEY_CTX_set_hkdf_md(ctx, hash) != 1) {
772 		SPDK_ERRLOG("Unable to set hash method for HKDF!\n");
773 		rc = -EOPNOTSUPP;
774 		goto end;
775 	}
776 	if (EVP_PKEY_CTX_set1_hkdf_key(ctx, psk_in, psk_in_size) != 1) {
777 		SPDK_ERRLOG("Unable to set PSK key for HKDF!\n");
778 		rc = -ENOBUFS;
779 		goto end;
780 	}
781 	if (EVP_PKEY_CTX_add1_hkdf_info(ctx, hkdf_info, hkdf_info_size) != 1) {
782 		SPDK_ERRLOG("Unable to set info label for HKDF!\n");
783 		rc = -ENOBUFS;
784 		goto end;
785 	}
786 	if (EVP_PKEY_CTX_set1_hkdf_salt(ctx, NULL, 0) != 1) {
787 		SPDK_ERRLOG("Unable to set salt for HKDF!\n");
788 		rc = -EINVAL;
789 		goto end;
790 	}
791 	if (EVP_PKEY_derive(ctx, psk_out, &digest_len) != 1) {
792 		SPDK_ERRLOG("Unable to derive the PSK key!\n");
793 		rc = -EINVAL;
794 		goto end;
795 	}
796 
797 	rc = digest_len;
798 
799 end:
800 	EVP_PKEY_CTX_free(ctx);
801 	return rc;
802 }
803 
804 static inline int
805 nvme_tcp_parse_interchange_psk(const char *psk_in, uint8_t *psk_out, size_t psk_out_size,
806 			       uint64_t *psk_out_decoded_size, uint8_t *hash)
807 {
808 	const char *delim = ":";
809 	char psk_cpy[SPDK_TLS_PSK_MAX_LEN] = {};
810 	uint8_t psk_base64_decoded[SPDK_TLS_PSK_MAX_LEN] = {};
811 	uint64_t psk_configured_size = 0;
812 	uint32_t crc32_calc, crc32;
813 	char *psk_base64;
814 	uint64_t psk_base64_decoded_size = 0;
815 	int rc;
816 
817 	/* Verify PSK format. */
818 	if (sscanf(psk_in, "NVMeTLSkey-1:%02hhx:", hash) != 1 || psk_in[strlen(psk_in) - 1] != delim[0]) {
819 		SPDK_ERRLOG("Invalid format of PSK interchange!\n");
820 		return -EINVAL;
821 	}
822 
823 	if (strlen(psk_in) >= SPDK_TLS_PSK_MAX_LEN) {
824 		SPDK_ERRLOG("PSK interchange exceeds maximum %d characters!\n", SPDK_TLS_PSK_MAX_LEN);
825 		return -EINVAL;
826 	}
827 	if (*hash != NVME_TCP_HASH_ALGORITHM_NONE && *hash != NVME_TCP_HASH_ALGORITHM_SHA256 &&
828 	    *hash != NVME_TCP_HASH_ALGORITHM_SHA384) {
829 		SPDK_ERRLOG("Invalid PSK length!\n");
830 		return -EINVAL;
831 	}
832 
833 	/* Check provided hash function string. */
834 	memcpy(psk_cpy, psk_in, strlen(psk_in));
835 	strtok(psk_cpy, delim);
836 	strtok(NULL, delim);
837 
838 	psk_base64 = strtok(NULL, delim);
839 	if (psk_base64 == NULL) {
840 		SPDK_ERRLOG("Could not get base64 string from PSK interchange!\n");
841 		return -EINVAL;
842 	}
843 
844 	rc = spdk_base64_decode(psk_base64_decoded, &psk_base64_decoded_size, psk_base64);
845 	if (rc) {
846 		SPDK_ERRLOG("Could not decode base64 PSK!\n");
847 		return -EINVAL;
848 	}
849 
850 	switch (*hash) {
851 	case NVME_TCP_HASH_ALGORITHM_SHA256:
852 		psk_configured_size = SHA256_DIGEST_LENGTH;
853 		break;
854 	case NVME_TCP_HASH_ALGORITHM_SHA384:
855 		psk_configured_size = SHA384_DIGEST_LENGTH;
856 		break;
857 	case NVME_TCP_HASH_ALGORITHM_NONE:
858 		if (psk_base64_decoded_size == SHA256_DIGEST_LENGTH + SPDK_CRC32_SIZE_BYTES) {
859 			psk_configured_size = SHA256_DIGEST_LENGTH;
860 		} else if (psk_base64_decoded_size == SHA384_DIGEST_LENGTH + SPDK_CRC32_SIZE_BYTES) {
861 			psk_configured_size = SHA384_DIGEST_LENGTH;
862 		}
863 		break;
864 	default:
865 		SPDK_ERRLOG("Invalid key: unsupported key hash\n");
866 		assert(0);
867 		return -EINVAL;
868 	}
869 	if (psk_base64_decoded_size != psk_configured_size + SPDK_CRC32_SIZE_BYTES) {
870 		SPDK_ERRLOG("Invalid key: unsupported key length\n");
871 		return -EINVAL;
872 	}
873 
874 	crc32 = from_le32(&psk_base64_decoded[psk_configured_size]);
875 
876 	crc32_calc = spdk_crc32_ieee_update(psk_base64_decoded, psk_configured_size, ~0);
877 	crc32_calc = ~crc32_calc;
878 
879 	if (crc32 != crc32_calc) {
880 		SPDK_ERRLOG("CRC-32 checksums do not match!\n");
881 		return -EINVAL;
882 	}
883 
884 	if (psk_configured_size > psk_out_size) {
885 		SPDK_ERRLOG("Insufficient buffer size: %lu for configured PSK of size: %lu!\n",
886 			    psk_out_size, psk_configured_size);
887 		return -ENOBUFS;
888 	}
889 	memcpy(psk_out, psk_base64_decoded, psk_configured_size);
890 	*psk_out_decoded_size = psk_configured_size;
891 
892 	return rc;
893 }
894 
895 #endif /* SPDK_INTERNAL_NVME_TCP_H */
896