xref: /spdk/include/spdk_internal/nvme_tcp.h (revision 2e1d23f4b70ea8940db7624b3bb974a4a8658ec7)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2018 Intel Corporation. All rights reserved.
3  *   Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved.
4  *   Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5  */
6 
7 #ifndef SPDK_INTERNAL_NVME_TCP_H
8 #define SPDK_INTERNAL_NVME_TCP_H
9 
10 #include "spdk/likely.h"
11 #include "spdk/sock.h"
12 #include "spdk/dif.h"
13 #include "spdk/hexlify.h"
14 #include "spdk/nvmf_spec.h"
15 #include "spdk/util.h"
16 #include "spdk/base64.h"
17 
18 #include "sgl.h"
19 
20 #include "openssl/evp.h"
21 #include "openssl/kdf.h"
22 #include "openssl/sha.h"
23 
24 #define SPDK_CRC32C_XOR				0xffffffffUL
25 #define SPDK_NVME_TCP_DIGEST_LEN		4
26 #define SPDK_NVME_TCP_DIGEST_ALIGNMENT		4
27 #define SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT	30
28 #define SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR	8
29 #define SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE	8192u
30 /*
31  * Maximum number of SGL elements.
32  */
33 #define NVME_TCP_MAX_SGL_DESCRIPTORS	(16)
34 
35 #define MAKE_DIGEST_WORD(BUF, CRC32C) \
36         (   ((*((uint8_t *)(BUF)+0)) = (uint8_t)((uint32_t)(CRC32C) >> 0)), \
37             ((*((uint8_t *)(BUF)+1)) = (uint8_t)((uint32_t)(CRC32C) >> 8)), \
38             ((*((uint8_t *)(BUF)+2)) = (uint8_t)((uint32_t)(CRC32C) >> 16)), \
39             ((*((uint8_t *)(BUF)+3)) = (uint8_t)((uint32_t)(CRC32C) >> 24)))
40 
41 #define MATCH_DIGEST_WORD(BUF, CRC32C) \
42         (    ((((uint32_t) *((uint8_t *)(BUF)+0)) << 0)         \
43             | (((uint32_t) *((uint8_t *)(BUF)+1)) << 8)         \
44             | (((uint32_t) *((uint8_t *)(BUF)+2)) << 16)        \
45             | (((uint32_t) *((uint8_t *)(BUF)+3)) << 24))       \
46             == (CRC32C))
47 
48 #define DGET32(B)                                                               \
49         (((  (uint32_t) *((uint8_t *)(B)+0)) << 0)                              \
50          | (((uint32_t) *((uint8_t *)(B)+1)) << 8)                              \
51          | (((uint32_t) *((uint8_t *)(B)+2)) << 16)                             \
52          | (((uint32_t) *((uint8_t *)(B)+3)) << 24))
53 
54 #define DSET32(B,D)                                                             \
55         (((*((uint8_t *)(B)+0)) = (uint8_t)((uint32_t)(D) >> 0)),               \
56          ((*((uint8_t *)(B)+1)) = (uint8_t)((uint32_t)(D) >> 8)),               \
57          ((*((uint8_t *)(B)+2)) = (uint8_t)((uint32_t)(D) >> 16)),              \
58          ((*((uint8_t *)(B)+3)) = (uint8_t)((uint32_t)(D) >> 24)))
59 
60 /* The PSK identity comprises of following components:
61  * 4-character format specifier "NVMe" +
62  * 1-character TLS protocol version indicator +
63  * 1-character PSK type indicator, specifying the used PSK +
64  * 2-characters hash specifier +
65  * NQN of the host (SPDK_NVMF_NQN_MAX_LEN -> 223) +
66  * NQN of the subsystem (SPDK_NVMF_NQN_MAX_LEN -> 223) +
67  * 2 space character separators +
68  * 1 null terminator =
69  * 457 characters. */
70 #define NVMF_PSK_IDENTITY_LEN (SPDK_NVMF_NQN_MAX_LEN + SPDK_NVMF_NQN_MAX_LEN + 11)
71 
72 /* The maximum size of hkdf_info is defined by RFC 8446, 514B (2 + 256 + 256). */
73 #define NVME_TCP_HKDF_INFO_MAX_LEN 514
74 
75 #define PSK_ID_PREFIX "NVMe0R"
76 
77 enum nvme_tcp_cipher_suite {
78 	NVME_TCP_CIPHER_AES_128_GCM_SHA256,
79 	NVME_TCP_CIPHER_AES_256_GCM_SHA384,
80 };
81 
82 typedef void (*nvme_tcp_qpair_xfer_complete_cb)(void *cb_arg);
83 
84 struct nvme_tcp_pdu {
85 	union {
86 		/* to hold error pdu data */
87 		uint8_t					raw[SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE];
88 		struct spdk_nvme_tcp_common_pdu_hdr	common;
89 		struct spdk_nvme_tcp_ic_req		ic_req;
90 		struct spdk_nvme_tcp_term_req_hdr	term_req;
91 		struct spdk_nvme_tcp_cmd		capsule_cmd;
92 		struct spdk_nvme_tcp_h2c_data_hdr	h2c_data;
93 		struct spdk_nvme_tcp_ic_resp		ic_resp;
94 		struct spdk_nvme_tcp_rsp		capsule_resp;
95 		struct spdk_nvme_tcp_c2h_data_hdr	c2h_data;
96 		struct spdk_nvme_tcp_r2t_hdr		r2t;
97 
98 	} hdr;
99 
100 	bool						has_hdgst;
101 	bool						ddgst_enable;
102 	uint32_t					data_digest_crc32;
103 	uint8_t						data_digest[SPDK_NVME_TCP_DIGEST_LEN];
104 
105 	uint8_t						ch_valid_bytes;
106 	uint8_t						psh_valid_bytes;
107 	uint8_t						psh_len;
108 
109 	nvme_tcp_qpair_xfer_complete_cb			cb_fn;
110 	void						*cb_arg;
111 
112 	/* The sock request ends with a 0 length iovec. Place the actual iovec immediately
113 	 * after it. There is a static assert below to check if the compiler inserted
114 	 * any unwanted padding */
115 	struct spdk_sock_request			sock_req;
116 	struct iovec					iov[NVME_TCP_MAX_SGL_DESCRIPTORS * 2];
117 
118 	struct iovec					data_iov[NVME_TCP_MAX_SGL_DESCRIPTORS];
119 	uint32_t					data_iovcnt;
120 	uint32_t					data_len;
121 
122 	uint32_t					rw_offset;
123 	TAILQ_ENTRY(nvme_tcp_pdu)			tailq;
124 	uint32_t					remaining;
125 	uint32_t					padding_len;
126 
127 	struct spdk_dif_ctx				*dif_ctx;
128 
129 	void						*req; /* data tied to a tcp request */
130 	void						*qpair;
131 	SLIST_ENTRY(nvme_tcp_pdu)			slist;
132 };
133 SPDK_STATIC_ASSERT(offsetof(struct nvme_tcp_pdu,
134 			    sock_req) + sizeof(struct spdk_sock_request) == offsetof(struct nvme_tcp_pdu, iov),
135 		   "Compiler inserted padding between iov and sock_req");
136 
137 enum nvme_tcp_pdu_recv_state {
138 	/* Ready to wait for PDU */
139 	NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY,
140 
141 	/* Active tqpair waiting for any PDU common header */
142 	NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH,
143 
144 	/* Active tqpair waiting for any PDU specific header */
145 	NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH,
146 
147 	/* Active tqpair waiting for a tcp request, only use in target side */
148 	NVME_TCP_PDU_RECV_STATE_AWAIT_REQ,
149 
150 	/* Active tqpair waiting for a free buffer to store PDU */
151 	NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_BUF,
152 
153 	/* Active tqpair waiting for payload */
154 	NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD,
155 
156 	/* Active tqpair waiting for all outstanding PDUs to complete */
157 	NVME_TCP_PDU_RECV_STATE_QUIESCING,
158 
159 	/* Active tqpair does not wait for payload */
160 	NVME_TCP_PDU_RECV_STATE_ERROR,
161 };
162 
163 enum nvme_tcp_error_codes {
164 	NVME_TCP_PDU_IN_PROGRESS        = 0,
165 	NVME_TCP_CONNECTION_FATAL       = -1,
166 	NVME_TCP_PDU_FATAL              = -2,
167 };
168 
169 static const bool g_nvme_tcp_hdgst[] = {
170 	[SPDK_NVME_TCP_PDU_TYPE_IC_REQ]         = false,
171 	[SPDK_NVME_TCP_PDU_TYPE_IC_RESP]        = false,
172 	[SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ]   = false,
173 	[SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ]   = false,
174 	[SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD]    = true,
175 	[SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP]   = true,
176 	[SPDK_NVME_TCP_PDU_TYPE_H2C_DATA]       = true,
177 	[SPDK_NVME_TCP_PDU_TYPE_C2H_DATA]       = true,
178 	[SPDK_NVME_TCP_PDU_TYPE_R2T]            = true
179 };
180 
181 static const bool g_nvme_tcp_ddgst[] = {
182 	[SPDK_NVME_TCP_PDU_TYPE_IC_REQ]         = false,
183 	[SPDK_NVME_TCP_PDU_TYPE_IC_RESP]        = false,
184 	[SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ]   = false,
185 	[SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ]   = false,
186 	[SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD]    = true,
187 	[SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP]   = false,
188 	[SPDK_NVME_TCP_PDU_TYPE_H2C_DATA]       = true,
189 	[SPDK_NVME_TCP_PDU_TYPE_C2H_DATA]       = true,
190 	[SPDK_NVME_TCP_PDU_TYPE_R2T]            = false
191 };
192 
193 static uint32_t
194 nvme_tcp_pdu_calc_header_digest(struct nvme_tcp_pdu *pdu)
195 {
196 	uint32_t crc32c;
197 	uint32_t hlen = pdu->hdr.common.hlen;
198 
199 	crc32c = spdk_crc32c_update(&pdu->hdr.raw, hlen, ~0);
200 	crc32c = crc32c ^ SPDK_CRC32C_XOR;
201 	return crc32c;
202 }
203 
204 static uint32_t
205 nvme_tcp_pdu_calc_data_digest(struct nvme_tcp_pdu *pdu)
206 {
207 	uint32_t crc32c = SPDK_CRC32C_XOR;
208 	uint32_t mod;
209 
210 	assert(pdu->data_len != 0);
211 
212 	if (spdk_likely(!pdu->dif_ctx)) {
213 		crc32c = spdk_crc32c_iov_update(pdu->data_iov, pdu->data_iovcnt, crc32c);
214 	} else {
215 		spdk_dif_update_crc32c_stream(pdu->data_iov, pdu->data_iovcnt,
216 					      0, pdu->data_len, &crc32c, pdu->dif_ctx);
217 	}
218 
219 	mod = pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT;
220 	if (mod != 0) {
221 		uint32_t pad_length = SPDK_NVME_TCP_DIGEST_ALIGNMENT - mod;
222 		uint8_t pad[3] = {0, 0, 0};
223 
224 		assert(pad_length > 0);
225 		assert(pad_length <= sizeof(pad));
226 		crc32c = spdk_crc32c_update(pad, pad_length, crc32c);
227 	}
228 	return crc32c;
229 }
230 
231 static inline void
232 _nvme_tcp_sgl_get_buf(struct spdk_iov_sgl *s, void **_buf, uint32_t *_buf_len)
233 {
234 	if (_buf != NULL) {
235 		*_buf = (uint8_t *)s->iov->iov_base + s->iov_offset;
236 	}
237 	if (_buf_len != NULL) {
238 		*_buf_len = s->iov->iov_len - s->iov_offset;
239 	}
240 }
241 
242 static inline bool
243 _nvme_tcp_sgl_append_multi(struct spdk_iov_sgl *s, struct iovec *iov, int iovcnt)
244 {
245 	int i;
246 
247 	for (i = 0; i < iovcnt; i++) {
248 		if (!spdk_iov_sgl_append(s, iov[i].iov_base, iov[i].iov_len)) {
249 			return false;
250 		}
251 	}
252 
253 	return true;
254 }
255 
256 static inline uint32_t
257 _get_iov_array_size(struct iovec *iov, int iovcnt)
258 {
259 	int i;
260 	uint32_t size = 0;
261 
262 	for (i = 0; i < iovcnt; i++) {
263 		size += iov[i].iov_len;
264 	}
265 
266 	return size;
267 }
268 
269 static inline bool
270 _nvme_tcp_sgl_append_multi_with_md(struct spdk_iov_sgl *s, struct iovec *iov, int iovcnt,
271 				   uint32_t data_len, const struct spdk_dif_ctx *dif_ctx)
272 {
273 	int rc;
274 	uint32_t mapped_len = 0;
275 
276 	if (s->iov_offset >= data_len) {
277 		s->iov_offset -= _get_iov_array_size(iov, iovcnt);
278 	} else {
279 		rc = spdk_dif_set_md_interleave_iovs(s->iov, s->iovcnt, iov, iovcnt,
280 						     s->iov_offset, data_len - s->iov_offset,
281 						     &mapped_len, dif_ctx);
282 		if (rc < 0) {
283 			SPDK_ERRLOG("Failed to setup iovs for DIF insert/strip.\n");
284 			return false;
285 		}
286 
287 		s->total_size += mapped_len;
288 		s->iov_offset = 0;
289 		assert(s->iovcnt >= rc);
290 		s->iovcnt -= rc;
291 		s->iov += rc;
292 
293 		if (s->iovcnt == 0) {
294 			return false;
295 		}
296 	}
297 
298 	return true;
299 }
300 
301 static int
302 nvme_tcp_build_iovs(struct iovec *iov, int iovcnt, struct nvme_tcp_pdu *pdu,
303 		    bool hdgst_enable, bool ddgst_enable, uint32_t *_mapped_length)
304 {
305 	uint32_t hlen;
306 	uint32_t plen __attribute__((unused));
307 	struct spdk_iov_sgl sgl;
308 
309 	if (iovcnt == 0) {
310 		return 0;
311 	}
312 
313 	spdk_iov_sgl_init(&sgl, iov, iovcnt, 0);
314 	hlen = pdu->hdr.common.hlen;
315 
316 	/* Header Digest */
317 	if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && hdgst_enable) {
318 		hlen += SPDK_NVME_TCP_DIGEST_LEN;
319 	}
320 
321 	plen = hlen;
322 	if (!pdu->data_len) {
323 		/* PDU header + possible header digest */
324 		spdk_iov_sgl_append(&sgl, (uint8_t *)&pdu->hdr.raw, hlen);
325 		goto end;
326 	}
327 
328 	/* Padding */
329 	if (pdu->padding_len > 0) {
330 		hlen += pdu->padding_len;
331 		plen = hlen;
332 	}
333 
334 	if (!spdk_iov_sgl_append(&sgl, (uint8_t *)&pdu->hdr.raw, hlen)) {
335 		goto end;
336 	}
337 
338 	/* Data Segment */
339 	plen += pdu->data_len;
340 	if (spdk_likely(!pdu->dif_ctx)) {
341 		if (!_nvme_tcp_sgl_append_multi(&sgl, pdu->data_iov, pdu->data_iovcnt)) {
342 			goto end;
343 		}
344 	} else {
345 		if (!_nvme_tcp_sgl_append_multi_with_md(&sgl, pdu->data_iov, pdu->data_iovcnt,
346 							pdu->data_len, pdu->dif_ctx)) {
347 			goto end;
348 		}
349 	}
350 
351 	/* Data Digest */
352 	if (g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] && ddgst_enable) {
353 		plen += SPDK_NVME_TCP_DIGEST_LEN;
354 		spdk_iov_sgl_append(&sgl, pdu->data_digest, SPDK_NVME_TCP_DIGEST_LEN);
355 	}
356 
357 	assert(plen == pdu->hdr.common.plen);
358 
359 end:
360 	if (_mapped_length != NULL) {
361 		*_mapped_length = sgl.total_size;
362 	}
363 
364 	return iovcnt - sgl.iovcnt;
365 }
366 
367 static int
368 nvme_tcp_build_payload_iovs(struct iovec *iov, int iovcnt, struct nvme_tcp_pdu *pdu,
369 			    bool ddgst_enable, uint32_t *_mapped_length)
370 {
371 	struct spdk_iov_sgl sgl;
372 
373 	if (iovcnt == 0) {
374 		return 0;
375 	}
376 
377 	spdk_iov_sgl_init(&sgl, iov, iovcnt, pdu->rw_offset);
378 
379 	if (spdk_likely(!pdu->dif_ctx)) {
380 		if (!_nvme_tcp_sgl_append_multi(&sgl, pdu->data_iov, pdu->data_iovcnt)) {
381 			goto end;
382 		}
383 	} else {
384 		if (!_nvme_tcp_sgl_append_multi_with_md(&sgl, pdu->data_iov, pdu->data_iovcnt,
385 							pdu->data_len, pdu->dif_ctx)) {
386 			goto end;
387 		}
388 	}
389 
390 	/* Data Digest */
391 	if (ddgst_enable) {
392 		spdk_iov_sgl_append(&sgl, pdu->data_digest, SPDK_NVME_TCP_DIGEST_LEN);
393 	}
394 
395 end:
396 	if (_mapped_length != NULL) {
397 		*_mapped_length = sgl.total_size;
398 	}
399 	return iovcnt - sgl.iovcnt;
400 }
401 
402 static int
403 nvme_tcp_read_data(struct spdk_sock *sock, int bytes,
404 		   void *buf)
405 {
406 	int ret;
407 
408 	ret = spdk_sock_recv(sock, buf, bytes);
409 
410 	if (ret > 0) {
411 		return ret;
412 	}
413 
414 	if (ret < 0) {
415 		if (errno == EAGAIN || errno == EWOULDBLOCK) {
416 			return 0;
417 		}
418 
419 		/* For connect reset issue, do not output error log */
420 		if (errno != ECONNRESET) {
421 			SPDK_ERRLOG("spdk_sock_recv() failed, errno %d: %s\n",
422 				    errno, spdk_strerror(errno));
423 		}
424 	}
425 
426 	/* connection closed */
427 	return NVME_TCP_CONNECTION_FATAL;
428 }
429 
430 static int
431 nvme_tcp_readv_data(struct spdk_sock *sock, struct iovec *iov, int iovcnt)
432 {
433 	int ret;
434 
435 	assert(sock != NULL);
436 	if (iov == NULL || iovcnt == 0) {
437 		return 0;
438 	}
439 
440 	if (iovcnt == 1) {
441 		return nvme_tcp_read_data(sock, iov->iov_len, iov->iov_base);
442 	}
443 
444 	ret = spdk_sock_readv(sock, iov, iovcnt);
445 
446 	if (ret > 0) {
447 		return ret;
448 	}
449 
450 	if (ret < 0) {
451 		if (errno == EAGAIN || errno == EWOULDBLOCK) {
452 			return 0;
453 		}
454 
455 		/* For connect reset issue, do not output error log */
456 		if (errno != ECONNRESET) {
457 			SPDK_ERRLOG("spdk_sock_readv() failed, errno %d: %s\n",
458 				    errno, spdk_strerror(errno));
459 		}
460 	}
461 
462 	/* connection closed */
463 	return NVME_TCP_CONNECTION_FATAL;
464 }
465 
466 
467 static int
468 nvme_tcp_read_payload_data(struct spdk_sock *sock, struct nvme_tcp_pdu *pdu)
469 {
470 	struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS + 1];
471 	int iovcnt;
472 
473 	iovcnt = nvme_tcp_build_payload_iovs(iov, NVME_TCP_MAX_SGL_DESCRIPTORS + 1, pdu,
474 					     pdu->ddgst_enable, NULL);
475 	assert(iovcnt >= 0);
476 
477 	return nvme_tcp_readv_data(sock, iov, iovcnt);
478 }
479 
480 static void
481 _nvme_tcp_pdu_set_data(struct nvme_tcp_pdu *pdu, void *data, uint32_t data_len)
482 {
483 	pdu->data_iov[0].iov_base = data;
484 	pdu->data_iov[0].iov_len = data_len;
485 	pdu->data_iovcnt = 1;
486 }
487 
488 static void
489 nvme_tcp_pdu_set_data(struct nvme_tcp_pdu *pdu, void *data, uint32_t data_len)
490 {
491 	_nvme_tcp_pdu_set_data(pdu, data, data_len);
492 	pdu->data_len = data_len;
493 }
494 
495 static void
496 nvme_tcp_pdu_set_data_buf(struct nvme_tcp_pdu *pdu,
497 			  struct iovec *iov, int iovcnt,
498 			  uint32_t data_offset, uint32_t data_len)
499 {
500 	uint32_t buf_offset, buf_len, remain_len, len;
501 	uint8_t *buf;
502 	struct spdk_iov_sgl pdu_sgl, buf_sgl;
503 
504 	pdu->data_len = data_len;
505 
506 	if (spdk_likely(!pdu->dif_ctx)) {
507 		buf_offset = data_offset;
508 		buf_len = data_len;
509 	} else {
510 		spdk_dif_ctx_set_data_offset(pdu->dif_ctx, data_offset);
511 		spdk_dif_get_range_with_md(data_offset, data_len,
512 					   &buf_offset, &buf_len, pdu->dif_ctx);
513 	}
514 
515 	if (iovcnt == 1) {
516 		_nvme_tcp_pdu_set_data(pdu, (void *)((uint64_t)iov[0].iov_base + buf_offset), buf_len);
517 	} else {
518 		spdk_iov_sgl_init(&pdu_sgl, pdu->data_iov, NVME_TCP_MAX_SGL_DESCRIPTORS, 0);
519 		spdk_iov_sgl_init(&buf_sgl, iov, iovcnt, 0);
520 
521 		spdk_iov_sgl_advance(&buf_sgl, buf_offset);
522 		remain_len = buf_len;
523 
524 		while (remain_len > 0) {
525 			_nvme_tcp_sgl_get_buf(&buf_sgl, (void *)&buf, &len);
526 			len = spdk_min(len, remain_len);
527 
528 			spdk_iov_sgl_advance(&buf_sgl, len);
529 			remain_len -= len;
530 
531 			if (!spdk_iov_sgl_append(&pdu_sgl, buf, len)) {
532 				break;
533 			}
534 		}
535 
536 		assert(remain_len == 0);
537 		assert(pdu_sgl.total_size == buf_len);
538 
539 		pdu->data_iovcnt = NVME_TCP_MAX_SGL_DESCRIPTORS - pdu_sgl.iovcnt;
540 	}
541 }
542 
543 static void
544 nvme_tcp_pdu_calc_psh_len(struct nvme_tcp_pdu *pdu, bool hdgst_enable)
545 {
546 	uint8_t psh_len, pdo, padding_len;
547 
548 	psh_len = pdu->hdr.common.hlen;
549 
550 	if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && hdgst_enable) {
551 		pdu->has_hdgst = true;
552 		psh_len += SPDK_NVME_TCP_DIGEST_LEN;
553 	}
554 	if (pdu->hdr.common.plen > psh_len) {
555 		switch (pdu->hdr.common.pdu_type) {
556 		case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD:
557 		case SPDK_NVME_TCP_PDU_TYPE_H2C_DATA:
558 		case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA:
559 			pdo = pdu->hdr.common.pdo;
560 			padding_len = pdo - psh_len;
561 			if (padding_len > 0) {
562 				psh_len = pdo;
563 			}
564 			break;
565 		default:
566 			/* There is no padding for other PDU types */
567 			break;
568 		}
569 	}
570 
571 	psh_len -= sizeof(struct spdk_nvme_tcp_common_pdu_hdr);
572 	pdu->psh_len = psh_len;
573 }
574 
575 static inline int
576 nvme_tcp_generate_psk_identity(char *out_id, size_t out_id_len, const char *hostnqn,
577 			       const char *subnqn, enum nvme_tcp_cipher_suite tls_cipher_suite)
578 {
579 	int rc;
580 
581 	assert(out_id != NULL);
582 
583 	if (out_id_len < strlen(PSK_ID_PREFIX) + strlen(hostnqn) + strlen(subnqn) + 5) {
584 		SPDK_ERRLOG("Out buffer too small!\n");
585 		return -1;
586 	}
587 
588 	if (tls_cipher_suite == NVME_TCP_CIPHER_AES_128_GCM_SHA256) {
589 		rc = snprintf(out_id, out_id_len, "%s%s %s %s", PSK_ID_PREFIX, "01",
590 			      hostnqn, subnqn);
591 	} else if (tls_cipher_suite == NVME_TCP_CIPHER_AES_256_GCM_SHA384) {
592 		rc = snprintf(out_id, out_id_len, "%s%s %s %s", PSK_ID_PREFIX, "02",
593 			      hostnqn, subnqn);
594 	} else {
595 		SPDK_ERRLOG("Unknown cipher suite requested!\n");
596 		return -EOPNOTSUPP;
597 	}
598 
599 	if (rc < 0) {
600 		SPDK_ERRLOG("Could not generate PSK identity\n");
601 		return -1;
602 	}
603 
604 	return 0;
605 }
606 
607 enum nvme_tcp_hash_algorithm {
608 	NVME_TCP_HASH_ALGORITHM_NONE,
609 	NVME_TCP_HASH_ALGORITHM_SHA256,
610 	NVME_TCP_HASH_ALGORITHM_SHA384,
611 };
612 
613 static inline int
614 nvme_tcp_derive_retained_psk(const uint8_t *psk_in, uint64_t psk_in_size, const char *hostnqn,
615 			     uint8_t *psk_out, uint64_t psk_out_len, enum nvme_tcp_hash_algorithm psk_retained_hash)
616 {
617 	EVP_PKEY_CTX *ctx;
618 	uint64_t digest_len;
619 	uint8_t hkdf_info[NVME_TCP_HKDF_INFO_MAX_LEN] = {};
620 	const char *label = "tls13 HostNQN";
621 	size_t pos, labellen, nqnlen;
622 	const EVP_MD *hash;
623 	int rc, hkdf_info_size;
624 
625 	labellen = strlen(label);
626 	nqnlen = strlen(hostnqn);
627 	assert(nqnlen <= SPDK_NVMF_NQN_MAX_LEN);
628 
629 	*(uint16_t *)&hkdf_info[0] = htons(psk_in_size);
630 	pos = sizeof(uint16_t);
631 	hkdf_info[pos] = (uint8_t)labellen;
632 	pos += sizeof(uint8_t);
633 	memcpy(&hkdf_info[pos], label, labellen);
634 	pos += labellen;
635 	hkdf_info[pos] = (uint8_t)nqnlen;
636 	pos += sizeof(uint8_t);
637 	memcpy(&hkdf_info[pos], hostnqn, nqnlen);
638 	pos += nqnlen;
639 	hkdf_info_size = pos;
640 
641 	switch (psk_retained_hash) {
642 	case NVME_TCP_HASH_ALGORITHM_SHA256:
643 		digest_len = SHA256_DIGEST_LENGTH;
644 		hash = EVP_sha256();
645 		break;
646 	case NVME_TCP_HASH_ALGORITHM_SHA384:
647 		digest_len = SHA384_DIGEST_LENGTH;
648 		hash = EVP_sha384();
649 		break;
650 	default:
651 		SPDK_ERRLOG("Unknown PSK hash requested!\n");
652 		return -EOPNOTSUPP;
653 	}
654 
655 	if (digest_len > psk_out_len) {
656 		SPDK_ERRLOG("Insufficient buffer size for out key!\n");
657 		return -EINVAL;
658 	}
659 
660 	ctx = EVP_PKEY_CTX_new_id(EVP_PKEY_HKDF, NULL);
661 	if (!ctx) {
662 		SPDK_ERRLOG("Unable to initialize EVP_PKEY_CTX!\n");
663 		return -ENOMEM;
664 	}
665 
666 	/* EVP_PKEY_* functions returns 1 as a success code and 0 or negative on failure. */
667 	if (EVP_PKEY_derive_init(ctx) != 1) {
668 		SPDK_ERRLOG("Unable to initialize key derivation ctx for HKDF!\n");
669 		rc = -ENOMEM;
670 		goto end;
671 	}
672 	if (EVP_PKEY_CTX_set_hkdf_md(ctx, hash) != 1) {
673 		SPDK_ERRLOG("Unable to set hash for HKDF!\n");
674 		rc = -EOPNOTSUPP;
675 		goto end;
676 	}
677 	if (EVP_PKEY_CTX_set1_hkdf_key(ctx, psk_in, psk_in_size) != 1) {
678 		SPDK_ERRLOG("Unable to set PSK key for HKDF!\n");
679 		rc = -ENOBUFS;
680 		goto end;
681 	}
682 
683 	if (EVP_PKEY_CTX_add1_hkdf_info(ctx, hkdf_info, hkdf_info_size) != 1) {
684 		SPDK_ERRLOG("Unable to set info label for HKDF!\n");
685 		rc = -ENOBUFS;
686 		goto end;
687 	}
688 	if (EVP_PKEY_CTX_set1_hkdf_salt(ctx, NULL, 0) != 1) {
689 		SPDK_ERRLOG("Unable to set salt for HKDF!\n");
690 		rc = -EINVAL;
691 		goto end;
692 	}
693 	if (EVP_PKEY_derive(ctx, psk_out, &digest_len) != 1) {
694 		SPDK_ERRLOG("Unable to derive the PSK key!\n");
695 		rc = -EINVAL;
696 		goto end;
697 	}
698 
699 	rc = digest_len;
700 
701 end:
702 	EVP_PKEY_CTX_free(ctx);
703 	return rc;
704 }
705 
706 static inline int
707 nvme_tcp_derive_tls_psk(const uint8_t *psk_in, uint64_t psk_in_size, const char *psk_identity,
708 			uint8_t *psk_out, uint64_t psk_out_size, enum nvme_tcp_cipher_suite tls_cipher_suite)
709 {
710 	EVP_PKEY_CTX *ctx;
711 	uint64_t digest_len = 0;
712 	char hkdf_info[NVME_TCP_HKDF_INFO_MAX_LEN] = {};
713 	const char *label = "tls13 nvme-tls-psk";
714 	size_t pos, labellen, idlen;
715 	const EVP_MD *hash;
716 	int rc, hkdf_info_size;
717 
718 	if (tls_cipher_suite == NVME_TCP_CIPHER_AES_128_GCM_SHA256) {
719 		digest_len = SHA256_DIGEST_LENGTH;
720 		hash = EVP_sha256();
721 	} else if (tls_cipher_suite == NVME_TCP_CIPHER_AES_256_GCM_SHA384) {
722 		digest_len = SHA384_DIGEST_LENGTH;
723 		hash = EVP_sha384();
724 	} else {
725 		SPDK_ERRLOG("Unknown cipher suite requested!\n");
726 		return -EOPNOTSUPP;
727 	}
728 
729 	labellen = strlen(label);
730 	idlen = strlen(psk_identity);
731 	if (idlen > UINT8_MAX) {
732 		SPDK_ERRLOG("Invalid PSK ID: too long\n");
733 		return -1;
734 	}
735 
736 	*(uint16_t *)&hkdf_info[0] = htons(psk_in_size);
737 	pos = sizeof(uint16_t);
738 	hkdf_info[pos] = (uint8_t)labellen;
739 	pos += sizeof(uint8_t);
740 	memcpy(&hkdf_info[pos], label, labellen);
741 	pos += labellen;
742 	hkdf_info[pos] = (uint8_t)idlen;
743 	pos += sizeof(uint8_t);
744 	memcpy(&hkdf_info[pos], psk_identity, idlen);
745 	pos += idlen;
746 	hkdf_info_size = pos;
747 
748 	if (digest_len > psk_out_size) {
749 		SPDK_ERRLOG("Insufficient buffer size for out key!\n");
750 		return -1;
751 	}
752 
753 	ctx = EVP_PKEY_CTX_new_id(EVP_PKEY_HKDF, NULL);
754 	if (!ctx) {
755 		SPDK_ERRLOG("Unable to initialize EVP_PKEY_CTX!\n");
756 		return -1;
757 	}
758 
759 	if (EVP_PKEY_derive_init(ctx) != 1) {
760 		SPDK_ERRLOG("Unable to initialize key derivation ctx for HKDF!\n");
761 		rc = -ENOMEM;
762 		goto end;
763 	}
764 	if (EVP_PKEY_CTX_set_hkdf_md(ctx, hash) != 1) {
765 		SPDK_ERRLOG("Unable to set hash method for HKDF!\n");
766 		rc = -EOPNOTSUPP;
767 		goto end;
768 	}
769 	if (EVP_PKEY_CTX_set1_hkdf_key(ctx, psk_in, psk_in_size) != 1) {
770 		SPDK_ERRLOG("Unable to set PSK key for HKDF!\n");
771 		rc = -ENOBUFS;
772 		goto end;
773 	}
774 	if (EVP_PKEY_CTX_add1_hkdf_info(ctx, hkdf_info, hkdf_info_size) != 1) {
775 		SPDK_ERRLOG("Unable to set info label for HKDF!\n");
776 		rc = -ENOBUFS;
777 		goto end;
778 	}
779 	if (EVP_PKEY_CTX_set1_hkdf_salt(ctx, NULL, 0) != 1) {
780 		SPDK_ERRLOG("Unable to set salt for HKDF!\n");
781 		rc = -EINVAL;
782 		goto end;
783 	}
784 	if (EVP_PKEY_derive(ctx, psk_out, &digest_len) != 1) {
785 		SPDK_ERRLOG("Unable to derive the PSK key!\n");
786 		rc = -EINVAL;
787 		goto end;
788 	}
789 
790 	rc = digest_len;
791 
792 end:
793 	EVP_PKEY_CTX_free(ctx);
794 	return rc;
795 }
796 
797 static inline int
798 nvme_tcp_parse_interchange_psk(const char *psk_in, uint8_t *psk_out, size_t psk_out_size,
799 			       uint64_t *psk_out_decoded_size, uint8_t *hash)
800 {
801 	const char *delim = ":";
802 	char psk_cpy[SPDK_TLS_PSK_MAX_LEN] = {};
803 	char *sp = NULL;
804 	uint8_t psk_base64_decoded[SPDK_TLS_PSK_MAX_LEN] = {};
805 	uint64_t psk_configured_size = 0;
806 	uint32_t crc32_calc, crc32;
807 	char *psk_base64;
808 	uint64_t psk_base64_decoded_size = 0;
809 	int rc;
810 
811 	/* Verify PSK format. */
812 	if (sscanf(psk_in, "NVMeTLSkey-1:%02hhx:", hash) != 1 || psk_in[strlen(psk_in) - 1] != delim[0]) {
813 		SPDK_ERRLOG("Invalid format of PSK interchange!\n");
814 		return -EINVAL;
815 	}
816 
817 	if (strlen(psk_in) >= SPDK_TLS_PSK_MAX_LEN) {
818 		SPDK_ERRLOG("PSK interchange exceeds maximum %d characters!\n", SPDK_TLS_PSK_MAX_LEN);
819 		return -EINVAL;
820 	}
821 	if (*hash != NVME_TCP_HASH_ALGORITHM_NONE && *hash != NVME_TCP_HASH_ALGORITHM_SHA256 &&
822 	    *hash != NVME_TCP_HASH_ALGORITHM_SHA384) {
823 		SPDK_ERRLOG("Invalid PSK length!\n");
824 		return -EINVAL;
825 	}
826 
827 	/* Check provided hash function string. */
828 	memcpy(psk_cpy, psk_in, strlen(psk_in));
829 	strtok_r(psk_cpy, delim, &sp);
830 	strtok_r(NULL, delim, &sp);
831 
832 	psk_base64 = strtok_r(NULL, delim, &sp);
833 	if (psk_base64 == NULL) {
834 		SPDK_ERRLOG("Could not get base64 string from PSK interchange!\n");
835 		return -EINVAL;
836 	}
837 
838 	rc = spdk_base64_decode(psk_base64_decoded, &psk_base64_decoded_size, psk_base64);
839 	if (rc) {
840 		SPDK_ERRLOG("Could not decode base64 PSK!\n");
841 		return -EINVAL;
842 	}
843 
844 	switch (*hash) {
845 	case NVME_TCP_HASH_ALGORITHM_SHA256:
846 		psk_configured_size = SHA256_DIGEST_LENGTH;
847 		break;
848 	case NVME_TCP_HASH_ALGORITHM_SHA384:
849 		psk_configured_size = SHA384_DIGEST_LENGTH;
850 		break;
851 	case NVME_TCP_HASH_ALGORITHM_NONE:
852 		if (psk_base64_decoded_size == SHA256_DIGEST_LENGTH + SPDK_CRC32_SIZE_BYTES) {
853 			psk_configured_size = SHA256_DIGEST_LENGTH;
854 		} else if (psk_base64_decoded_size == SHA384_DIGEST_LENGTH + SPDK_CRC32_SIZE_BYTES) {
855 			psk_configured_size = SHA384_DIGEST_LENGTH;
856 		}
857 		break;
858 	default:
859 		SPDK_ERRLOG("Invalid key: unsupported key hash\n");
860 		assert(0);
861 		return -EINVAL;
862 	}
863 	if (psk_base64_decoded_size != psk_configured_size + SPDK_CRC32_SIZE_BYTES) {
864 		SPDK_ERRLOG("Invalid key: unsupported key length\n");
865 		return -EINVAL;
866 	}
867 
868 	crc32 = from_le32(&psk_base64_decoded[psk_configured_size]);
869 
870 	crc32_calc = spdk_crc32_ieee_update(psk_base64_decoded, psk_configured_size, ~0);
871 	crc32_calc = ~crc32_calc;
872 
873 	if (crc32 != crc32_calc) {
874 		SPDK_ERRLOG("CRC-32 checksums do not match!\n");
875 		return -EINVAL;
876 	}
877 
878 	if (psk_configured_size > psk_out_size) {
879 		SPDK_ERRLOG("Insufficient buffer size: %lu for configured PSK of size: %lu!\n",
880 			    psk_out_size, psk_configured_size);
881 		return -ENOBUFS;
882 	}
883 	memcpy(psk_out, psk_base64_decoded, psk_configured_size);
884 	*psk_out_decoded_size = psk_configured_size;
885 
886 	return rc;
887 }
888 
889 #endif /* SPDK_INTERNAL_NVME_TCP_H */
890