xref: /freebsd-src/lib/libnvmf/nvmf_tcp.c (revision 8bba2c0f8958443790b1f3abc0675719da987e87)
12da066efSJohn Baldwin /*-
22da066efSJohn Baldwin  * SPDX-License-Identifier: BSD-2-Clause
32da066efSJohn Baldwin  *
42da066efSJohn Baldwin  * Copyright (c) 2022-2024 Chelsio Communications, Inc.
52da066efSJohn Baldwin  * Written by: John Baldwin <jhb@FreeBSD.org>
62da066efSJohn Baldwin  */
72da066efSJohn Baldwin 
82da066efSJohn Baldwin #include <sys/endian.h>
92da066efSJohn Baldwin #include <sys/gsb_crc32.h>
102da066efSJohn Baldwin #include <sys/queue.h>
11*8bba2c0fSJohn Baldwin #include <sys/socket.h>
122da066efSJohn Baldwin #include <sys/uio.h>
132da066efSJohn Baldwin #include <assert.h>
142da066efSJohn Baldwin #include <errno.h>
15*8bba2c0fSJohn Baldwin #include <netdb.h>
162da066efSJohn Baldwin #include <stdio.h>
172da066efSJohn Baldwin #include <stdlib.h>
182da066efSJohn Baldwin #include <string.h>
192da066efSJohn Baldwin #include <unistd.h>
202da066efSJohn Baldwin 
212da066efSJohn Baldwin #include "libnvmf.h"
222da066efSJohn Baldwin #include "internal.h"
232da066efSJohn Baldwin #include "nvmf_tcp.h"
242da066efSJohn Baldwin 
252da066efSJohn Baldwin struct nvmf_tcp_qpair;
262da066efSJohn Baldwin 
272da066efSJohn Baldwin struct nvmf_tcp_command_buffer {
282da066efSJohn Baldwin 	struct nvmf_tcp_qpair *qp;
292da066efSJohn Baldwin 
302da066efSJohn Baldwin 	void	*data;
312da066efSJohn Baldwin 	size_t	data_len;
322da066efSJohn Baldwin 	size_t	data_xfered;
332da066efSJohn Baldwin 	uint32_t data_offset;
342da066efSJohn Baldwin 
352da066efSJohn Baldwin 	uint16_t cid;
362da066efSJohn Baldwin 	uint16_t ttag;
372da066efSJohn Baldwin 
382da066efSJohn Baldwin 	LIST_ENTRY(nvmf_tcp_command_buffer) link;
392da066efSJohn Baldwin };
402da066efSJohn Baldwin 
412da066efSJohn Baldwin LIST_HEAD(nvmf_tcp_command_buffer_list, nvmf_tcp_command_buffer);
422da066efSJohn Baldwin 
432da066efSJohn Baldwin struct nvmf_tcp_association {
442da066efSJohn Baldwin 	struct nvmf_association na;
452da066efSJohn Baldwin 
462da066efSJohn Baldwin 	uint32_t ioccsz;
472da066efSJohn Baldwin };
482da066efSJohn Baldwin 
492da066efSJohn Baldwin struct nvmf_tcp_rxpdu {
502da066efSJohn Baldwin 	struct nvme_tcp_common_pdu_hdr *hdr;
512da066efSJohn Baldwin 	uint32_t data_len;
522da066efSJohn Baldwin };
532da066efSJohn Baldwin 
542da066efSJohn Baldwin struct nvmf_tcp_capsule {
552da066efSJohn Baldwin 	struct nvmf_capsule nc;
562da066efSJohn Baldwin 
572da066efSJohn Baldwin 	struct nvmf_tcp_rxpdu rx_pdu;
582da066efSJohn Baldwin 	struct nvmf_tcp_command_buffer *cb;
592da066efSJohn Baldwin 
602da066efSJohn Baldwin 	TAILQ_ENTRY(nvmf_tcp_capsule) link;
612da066efSJohn Baldwin };
622da066efSJohn Baldwin 
632da066efSJohn Baldwin struct nvmf_tcp_qpair {
642da066efSJohn Baldwin 	struct nvmf_qpair qp;
652da066efSJohn Baldwin 	int s;
662da066efSJohn Baldwin 
672da066efSJohn Baldwin 	uint8_t	txpda;
682da066efSJohn Baldwin 	uint8_t rxpda;
692da066efSJohn Baldwin 	bool header_digests;
702da066efSJohn Baldwin 	bool data_digests;
712da066efSJohn Baldwin 	uint32_t maxr2t;
722da066efSJohn Baldwin 	uint32_t maxh2cdata;
732da066efSJohn Baldwin 	uint32_t max_icd;	/* Host only */
742da066efSJohn Baldwin 	uint16_t next_ttag;	/* Controller only */
752da066efSJohn Baldwin 
762da066efSJohn Baldwin 	struct nvmf_tcp_command_buffer_list tx_buffers;
772da066efSJohn Baldwin 	struct nvmf_tcp_command_buffer_list rx_buffers;
782da066efSJohn Baldwin 	TAILQ_HEAD(, nvmf_tcp_capsule) rx_capsules;
792da066efSJohn Baldwin };
802da066efSJohn Baldwin 
812da066efSJohn Baldwin #define	TASSOC(nc)	((struct nvmf_tcp_association *)(na))
822da066efSJohn Baldwin #define	TCAP(nc)	((struct nvmf_tcp_capsule *)(nc))
832da066efSJohn Baldwin #define	CTCAP(nc)	((const struct nvmf_tcp_capsule *)(nc))
842da066efSJohn Baldwin #define	TQP(qp)		((struct nvmf_tcp_qpair *)(qp))
852da066efSJohn Baldwin 
862da066efSJohn Baldwin static const char zero_padding[NVME_TCP_PDU_PDO_MAX_OFFSET];
872da066efSJohn Baldwin 
882da066efSJohn Baldwin static uint32_t
892da066efSJohn Baldwin compute_digest(const void *buf, size_t len)
902da066efSJohn Baldwin {
912da066efSJohn Baldwin 	return (calculate_crc32c(0xffffffff, buf, len) ^ 0xffffffff);
922da066efSJohn Baldwin }
932da066efSJohn Baldwin 
942da066efSJohn Baldwin static struct nvmf_tcp_command_buffer *
952da066efSJohn Baldwin tcp_alloc_command_buffer(struct nvmf_tcp_qpair *qp, void *data,
962da066efSJohn Baldwin     uint32_t data_offset, size_t data_len, uint16_t cid, uint16_t ttag,
972da066efSJohn Baldwin     bool receive)
982da066efSJohn Baldwin {
992da066efSJohn Baldwin 	struct nvmf_tcp_command_buffer *cb;
1002da066efSJohn Baldwin 
1012da066efSJohn Baldwin 	cb = malloc(sizeof(*cb));
1022da066efSJohn Baldwin 	cb->qp = qp;
1032da066efSJohn Baldwin 	cb->data = data;
1042da066efSJohn Baldwin 	cb->data_offset = data_offset;
1052da066efSJohn Baldwin 	cb->data_len = data_len;
1062da066efSJohn Baldwin 	cb->data_xfered = 0;
1072da066efSJohn Baldwin 	cb->cid = cid;
1082da066efSJohn Baldwin 	cb->ttag = ttag;
1092da066efSJohn Baldwin 
1102da066efSJohn Baldwin 	if (receive)
1112da066efSJohn Baldwin 		LIST_INSERT_HEAD(&qp->rx_buffers, cb, link);
1122da066efSJohn Baldwin 	else
1132da066efSJohn Baldwin 		LIST_INSERT_HEAD(&qp->tx_buffers, cb, link);
1142da066efSJohn Baldwin 	return (cb);
1152da066efSJohn Baldwin }
1162da066efSJohn Baldwin 
1172da066efSJohn Baldwin static struct nvmf_tcp_command_buffer *
1182da066efSJohn Baldwin tcp_find_command_buffer(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag,
1192da066efSJohn Baldwin     bool receive)
1202da066efSJohn Baldwin {
1212da066efSJohn Baldwin 	struct nvmf_tcp_command_buffer_list *list;
1222da066efSJohn Baldwin 	struct nvmf_tcp_command_buffer *cb;
1232da066efSJohn Baldwin 
1242da066efSJohn Baldwin 	list = receive ? &qp->rx_buffers : &qp->tx_buffers;
1252da066efSJohn Baldwin 	LIST_FOREACH(cb, list, link) {
1262da066efSJohn Baldwin 		if (cb->cid == cid && cb->ttag == ttag)
1272da066efSJohn Baldwin 			return (cb);
1282da066efSJohn Baldwin 	}
1292da066efSJohn Baldwin 	return (NULL);
1302da066efSJohn Baldwin }
1312da066efSJohn Baldwin 
1322da066efSJohn Baldwin static void
1332da066efSJohn Baldwin tcp_purge_command_buffer(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag,
1342da066efSJohn Baldwin     bool receive)
1352da066efSJohn Baldwin {
1362da066efSJohn Baldwin 	struct nvmf_tcp_command_buffer *cb;
1372da066efSJohn Baldwin 
1382da066efSJohn Baldwin 	cb = tcp_find_command_buffer(qp, cid, ttag, receive);
1392da066efSJohn Baldwin 	if (cb != NULL)
1402da066efSJohn Baldwin 		LIST_REMOVE(cb, link);
1412da066efSJohn Baldwin }
1422da066efSJohn Baldwin 
1432da066efSJohn Baldwin static void
1442da066efSJohn Baldwin tcp_free_command_buffer(struct nvmf_tcp_command_buffer *cb)
1452da066efSJohn Baldwin {
1462da066efSJohn Baldwin 	LIST_REMOVE(cb, link);
1472da066efSJohn Baldwin 	free(cb);
1482da066efSJohn Baldwin }
1492da066efSJohn Baldwin 
1502da066efSJohn Baldwin static int
1512da066efSJohn Baldwin nvmf_tcp_write_pdu(struct nvmf_tcp_qpair *qp, const void *pdu, size_t len)
1522da066efSJohn Baldwin {
1532da066efSJohn Baldwin 	ssize_t nwritten;
1542da066efSJohn Baldwin 	const char *cp;
1552da066efSJohn Baldwin 
1562da066efSJohn Baldwin 	cp = pdu;
1572da066efSJohn Baldwin 	while (len != 0) {
1582da066efSJohn Baldwin 		nwritten = write(qp->s, cp, len);
1592da066efSJohn Baldwin 		if (nwritten < 0)
1602da066efSJohn Baldwin 			return (errno);
1612da066efSJohn Baldwin 		len -= nwritten;
1622da066efSJohn Baldwin 		cp += nwritten;
1632da066efSJohn Baldwin 	}
1642da066efSJohn Baldwin 	return (0);
1652da066efSJohn Baldwin }
1662da066efSJohn Baldwin 
1672da066efSJohn Baldwin static int
1682da066efSJohn Baldwin nvmf_tcp_write_pdu_iov(struct nvmf_tcp_qpair *qp, struct iovec *iov,
1692da066efSJohn Baldwin     u_int iovcnt, size_t len)
1702da066efSJohn Baldwin {
1712da066efSJohn Baldwin 	ssize_t nwritten;
1722da066efSJohn Baldwin 
1732da066efSJohn Baldwin 	for (;;) {
1742da066efSJohn Baldwin 		nwritten = writev(qp->s, iov, iovcnt);
1752da066efSJohn Baldwin 		if (nwritten < 0)
1762da066efSJohn Baldwin 			return (errno);
1772da066efSJohn Baldwin 
1782da066efSJohn Baldwin 		len -= nwritten;
1792da066efSJohn Baldwin 		if (len == 0)
1802da066efSJohn Baldwin 			return (0);
1812da066efSJohn Baldwin 
1822da066efSJohn Baldwin 		while (iov->iov_len <= (size_t)nwritten) {
1832da066efSJohn Baldwin 			nwritten -= iov->iov_len;
1842da066efSJohn Baldwin 			iovcnt--;
1852da066efSJohn Baldwin 			iov++;
1862da066efSJohn Baldwin 		}
1872da066efSJohn Baldwin 
1882da066efSJohn Baldwin 		iov->iov_base = (char *)iov->iov_base + nwritten;
1892da066efSJohn Baldwin 		iov->iov_len -= nwritten;
1902da066efSJohn Baldwin 	}
1912da066efSJohn Baldwin }
1922da066efSJohn Baldwin 
1932da066efSJohn Baldwin static void
1942da066efSJohn Baldwin nvmf_tcp_report_error(struct nvmf_association *na, struct nvmf_tcp_qpair *qp,
1952da066efSJohn Baldwin     uint16_t fes, uint32_t fei, const void *rx_pdu, size_t pdu_len, u_int hlen)
1962da066efSJohn Baldwin {
1972da066efSJohn Baldwin 	struct nvme_tcp_term_req_hdr hdr;
1982da066efSJohn Baldwin 	struct iovec iov[2];
1992da066efSJohn Baldwin 
2002da066efSJohn Baldwin 	if (hlen != 0) {
2012da066efSJohn Baldwin 		if (hlen > NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE)
2022da066efSJohn Baldwin 			hlen = NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE;
2032da066efSJohn Baldwin 		if (hlen > pdu_len)
2042da066efSJohn Baldwin 			hlen = pdu_len;
2052da066efSJohn Baldwin 	}
2062da066efSJohn Baldwin 
2072da066efSJohn Baldwin 	memset(&hdr, 0, sizeof(hdr));
2082da066efSJohn Baldwin 	hdr.common.pdu_type = na->na_controller ?
2092da066efSJohn Baldwin 	    NVME_TCP_PDU_TYPE_C2H_TERM_REQ : NVME_TCP_PDU_TYPE_H2C_TERM_REQ;
2102da066efSJohn Baldwin 	hdr.common.hlen = sizeof(hdr);
2112da066efSJohn Baldwin 	hdr.common.plen = sizeof(hdr) + hlen;
2122da066efSJohn Baldwin 	hdr.fes = htole16(fes);
2132da066efSJohn Baldwin 	le32enc(hdr.fei, fei);
2142da066efSJohn Baldwin 	iov[0].iov_base = &hdr;
2152da066efSJohn Baldwin 	iov[0].iov_len = sizeof(hdr);
2162da066efSJohn Baldwin 	iov[1].iov_base = __DECONST(void *, rx_pdu);
2172da066efSJohn Baldwin 	iov[1].iov_len = hlen;
2182da066efSJohn Baldwin 
2192da066efSJohn Baldwin 	(void)nvmf_tcp_write_pdu_iov(qp, iov, nitems(iov), sizeof(hdr) + hlen);
2202da066efSJohn Baldwin 	close(qp->s);
2212da066efSJohn Baldwin 	qp->s = -1;
2222da066efSJohn Baldwin }
2232da066efSJohn Baldwin 
2242da066efSJohn Baldwin static int
2252da066efSJohn Baldwin nvmf_tcp_validate_pdu(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu,
2262da066efSJohn Baldwin     size_t pdu_len)
2272da066efSJohn Baldwin {
2282da066efSJohn Baldwin 	const struct nvme_tcp_common_pdu_hdr *ch;
2292da066efSJohn Baldwin 	uint32_t data_len, fei, plen;
2302da066efSJohn Baldwin 	uint32_t digest, rx_digest;
2312da066efSJohn Baldwin 	u_int hlen;
2322da066efSJohn Baldwin 	int error;
2332da066efSJohn Baldwin 	uint16_t fes;
2342da066efSJohn Baldwin 
2352da066efSJohn Baldwin 	/* Determine how large of a PDU header to return for errors. */
2362da066efSJohn Baldwin 	ch = pdu->hdr;
2372da066efSJohn Baldwin 	hlen = ch->hlen;
2382da066efSJohn Baldwin 	plen = le32toh(ch->plen);
2392da066efSJohn Baldwin 	if (hlen < sizeof(*ch) || hlen > plen)
2402da066efSJohn Baldwin 		hlen = sizeof(*ch);
2412da066efSJohn Baldwin 
2422da066efSJohn Baldwin 	error = nvmf_tcp_validate_pdu_header(ch,
2432da066efSJohn Baldwin 	    qp->qp.nq_association->na_controller, qp->header_digests,
2442da066efSJohn Baldwin 	    qp->data_digests, qp->rxpda, &data_len, &fes, &fei);
2452da066efSJohn Baldwin 	if (error != 0) {
2462da066efSJohn Baldwin 		if (error == ECONNRESET) {
2472da066efSJohn Baldwin 			close(qp->s);
2482da066efSJohn Baldwin 			qp->s = -1;
2492da066efSJohn Baldwin 		} else {
2502da066efSJohn Baldwin 			nvmf_tcp_report_error(qp->qp.nq_association, qp,
2512da066efSJohn Baldwin 			    fes, fei, ch, pdu_len, hlen);
2522da066efSJohn Baldwin 		}
2532da066efSJohn Baldwin 		return (error);
2542da066efSJohn Baldwin 	}
2552da066efSJohn Baldwin 
2562da066efSJohn Baldwin 	/* Check header digest if present. */
2572da066efSJohn Baldwin 	if ((ch->flags & NVME_TCP_CH_FLAGS_HDGSTF) != 0) {
2582da066efSJohn Baldwin 		digest = compute_digest(ch, ch->hlen);
2592da066efSJohn Baldwin 		memcpy(&rx_digest, (const char *)ch + ch->hlen,
2602da066efSJohn Baldwin 		    sizeof(rx_digest));
2612da066efSJohn Baldwin 		if (digest != rx_digest) {
2622da066efSJohn Baldwin 			printf("NVMe/TCP: Header digest mismatch\n");
2632da066efSJohn Baldwin 			nvmf_tcp_report_error(qp->qp.nq_association, qp,
2642da066efSJohn Baldwin 			    NVME_TCP_TERM_REQ_FES_HDGST_ERROR, rx_digest, ch,
2652da066efSJohn Baldwin 			    pdu_len, hlen);
2662da066efSJohn Baldwin 			return (EBADMSG);
2672da066efSJohn Baldwin 		}
2682da066efSJohn Baldwin 	}
2692da066efSJohn Baldwin 
2702da066efSJohn Baldwin 	/* Check data digest if present. */
2712da066efSJohn Baldwin 	if ((ch->flags & NVME_TCP_CH_FLAGS_DDGSTF) != 0) {
2722da066efSJohn Baldwin 		digest = compute_digest((const char *)ch + ch->pdo, data_len);
2732da066efSJohn Baldwin 		memcpy(&rx_digest, (const char *)ch + plen - sizeof(rx_digest),
2742da066efSJohn Baldwin 		    sizeof(rx_digest));
2752da066efSJohn Baldwin 		if (digest != rx_digest) {
2762da066efSJohn Baldwin 			printf("NVMe/TCP: Data digest mismatch\n");
2772da066efSJohn Baldwin 			return (EBADMSG);
2782da066efSJohn Baldwin 		}
2792da066efSJohn Baldwin 	}
2802da066efSJohn Baldwin 
2812da066efSJohn Baldwin 	pdu->data_len = data_len;
2822da066efSJohn Baldwin 	return (0);
2832da066efSJohn Baldwin }
2842da066efSJohn Baldwin 
2852da066efSJohn Baldwin /*
2862da066efSJohn Baldwin  * Read data from a socket, retrying until the data has been fully
2872da066efSJohn Baldwin  * read or an error occurs.
2882da066efSJohn Baldwin  */
2892da066efSJohn Baldwin static int
2902da066efSJohn Baldwin nvmf_tcp_read_buffer(int s, void *buf, size_t len)
2912da066efSJohn Baldwin {
2922da066efSJohn Baldwin 	ssize_t nread;
2932da066efSJohn Baldwin 	char *cp;
2942da066efSJohn Baldwin 
2952da066efSJohn Baldwin 	cp = buf;
2962da066efSJohn Baldwin 	while (len != 0) {
2972da066efSJohn Baldwin 		nread = read(s, cp, len);
2982da066efSJohn Baldwin 		if (nread < 0)
2992da066efSJohn Baldwin 			return (errno);
3002da066efSJohn Baldwin 		if (nread == 0)
3012da066efSJohn Baldwin 			return (ECONNRESET);
3022da066efSJohn Baldwin 		len -= nread;
3032da066efSJohn Baldwin 		cp += nread;
3042da066efSJohn Baldwin 	}
3052da066efSJohn Baldwin 	return (0);
3062da066efSJohn Baldwin }
3072da066efSJohn Baldwin 
3082da066efSJohn Baldwin static int
3092da066efSJohn Baldwin nvmf_tcp_read_pdu(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu)
3102da066efSJohn Baldwin {
3112da066efSJohn Baldwin 	struct nvme_tcp_common_pdu_hdr ch;
3122da066efSJohn Baldwin 	uint32_t plen;
3132da066efSJohn Baldwin 	int error;
3142da066efSJohn Baldwin 
3152da066efSJohn Baldwin 	memset(pdu, 0, sizeof(*pdu));
3162da066efSJohn Baldwin 	error = nvmf_tcp_read_buffer(qp->s, &ch, sizeof(ch));
3172da066efSJohn Baldwin 	if (error != 0)
3182da066efSJohn Baldwin 		return (error);
3192da066efSJohn Baldwin 
3202da066efSJohn Baldwin 	plen = le32toh(ch.plen);
3212da066efSJohn Baldwin 
3222da066efSJohn Baldwin 	/*
3232da066efSJohn Baldwin 	 * Validate a header with garbage lengths to trigger
3242da066efSJohn Baldwin 	 * an error message without reading more.
3252da066efSJohn Baldwin 	 */
3262da066efSJohn Baldwin 	if (plen < sizeof(ch) || ch.hlen > plen) {
3272da066efSJohn Baldwin 		pdu->hdr = &ch;
3282da066efSJohn Baldwin 		error = nvmf_tcp_validate_pdu(qp, pdu, sizeof(ch));
3292da066efSJohn Baldwin 		pdu->hdr = NULL;
3302da066efSJohn Baldwin 		assert(error != 0);
3312da066efSJohn Baldwin 		return (error);
3322da066efSJohn Baldwin 	}
3332da066efSJohn Baldwin 
3342da066efSJohn Baldwin 	/* Read the rest of the PDU. */
3352da066efSJohn Baldwin 	pdu->hdr = malloc(plen);
3362da066efSJohn Baldwin 	memcpy(pdu->hdr, &ch, sizeof(ch));
3372da066efSJohn Baldwin 	error = nvmf_tcp_read_buffer(qp->s, pdu->hdr + 1, plen - sizeof(ch));
3382da066efSJohn Baldwin 	if (error != 0)
3392da066efSJohn Baldwin 		return (error);
3402da066efSJohn Baldwin 	error = nvmf_tcp_validate_pdu(qp, pdu, plen);
3412da066efSJohn Baldwin 	if (error != 0) {
3422da066efSJohn Baldwin 		free(pdu->hdr);
3432da066efSJohn Baldwin 		pdu->hdr = NULL;
3442da066efSJohn Baldwin 	}
3452da066efSJohn Baldwin 	return (error);
3462da066efSJohn Baldwin }
3472da066efSJohn Baldwin 
3482da066efSJohn Baldwin static void
3492da066efSJohn Baldwin nvmf_tcp_free_pdu(struct nvmf_tcp_rxpdu *pdu)
3502da066efSJohn Baldwin {
3512da066efSJohn Baldwin 	free(pdu->hdr);
3522da066efSJohn Baldwin 	pdu->hdr = NULL;
3532da066efSJohn Baldwin }
3542da066efSJohn Baldwin 
3552da066efSJohn Baldwin static int
3562da066efSJohn Baldwin nvmf_tcp_handle_term_req(struct nvmf_tcp_rxpdu *pdu)
3572da066efSJohn Baldwin {
3582da066efSJohn Baldwin 	struct nvme_tcp_term_req_hdr *hdr;
3592da066efSJohn Baldwin 
3602da066efSJohn Baldwin 	hdr = (void *)pdu->hdr;
3612da066efSJohn Baldwin 
3622da066efSJohn Baldwin 	printf("NVMe/TCP: Received termination request: fes %#x fei %#x\n",
3632da066efSJohn Baldwin 	    le16toh(hdr->fes), le32dec(hdr->fei));
3642da066efSJohn Baldwin 	nvmf_tcp_free_pdu(pdu);
3652da066efSJohn Baldwin 	return (ECONNRESET);
3662da066efSJohn Baldwin }
3672da066efSJohn Baldwin 
3682da066efSJohn Baldwin static int
3692da066efSJohn Baldwin nvmf_tcp_save_command_capsule(struct nvmf_tcp_qpair *qp,
3702da066efSJohn Baldwin     struct nvmf_tcp_rxpdu *pdu)
3712da066efSJohn Baldwin {
3722da066efSJohn Baldwin 	struct nvme_tcp_cmd *cmd;
3732da066efSJohn Baldwin 	struct nvmf_capsule *nc;
3742da066efSJohn Baldwin 	struct nvmf_tcp_capsule *tc;
3752da066efSJohn Baldwin 
3762da066efSJohn Baldwin 	cmd = (void *)pdu->hdr;
3772da066efSJohn Baldwin 
3782da066efSJohn Baldwin 	nc = nvmf_allocate_command(&qp->qp, &cmd->ccsqe);
3792da066efSJohn Baldwin 	if (nc == NULL)
3802da066efSJohn Baldwin 		return (ENOMEM);
3812da066efSJohn Baldwin 
3822da066efSJohn Baldwin 	tc = TCAP(nc);
3832da066efSJohn Baldwin 	tc->rx_pdu = *pdu;
3842da066efSJohn Baldwin 
3852da066efSJohn Baldwin 	TAILQ_INSERT_TAIL(&qp->rx_capsules, tc, link);
3862da066efSJohn Baldwin 	return (0);
3872da066efSJohn Baldwin }
3882da066efSJohn Baldwin 
3892da066efSJohn Baldwin static int
3902da066efSJohn Baldwin nvmf_tcp_save_response_capsule(struct nvmf_tcp_qpair *qp,
3912da066efSJohn Baldwin     struct nvmf_tcp_rxpdu *pdu)
3922da066efSJohn Baldwin {
3932da066efSJohn Baldwin 	struct nvme_tcp_rsp *rsp;
3942da066efSJohn Baldwin 	struct nvmf_capsule *nc;
3952da066efSJohn Baldwin 	struct nvmf_tcp_capsule *tc;
3962da066efSJohn Baldwin 
3972da066efSJohn Baldwin 	rsp = (void *)pdu->hdr;
3982da066efSJohn Baldwin 
3992da066efSJohn Baldwin 	nc = nvmf_allocate_response(&qp->qp, &rsp->rccqe);
4002da066efSJohn Baldwin 	if (nc == NULL)
4012da066efSJohn Baldwin 		return (ENOMEM);
4022da066efSJohn Baldwin 
4032da066efSJohn Baldwin 	nc->nc_sqhd_valid = true;
4042da066efSJohn Baldwin 	tc = TCAP(nc);
4052da066efSJohn Baldwin 	tc->rx_pdu = *pdu;
4062da066efSJohn Baldwin 
4072da066efSJohn Baldwin 	TAILQ_INSERT_TAIL(&qp->rx_capsules, tc, link);
4082da066efSJohn Baldwin 
4092da066efSJohn Baldwin 	/*
4102da066efSJohn Baldwin 	 * Once the CQE has been received, no further transfers to the
4112da066efSJohn Baldwin 	 * command buffer for the associated CID can occur.
4122da066efSJohn Baldwin 	 */
4132da066efSJohn Baldwin 	tcp_purge_command_buffer(qp, rsp->rccqe.cid, 0, true);
4142da066efSJohn Baldwin 	tcp_purge_command_buffer(qp, rsp->rccqe.cid, 0, false);
4152da066efSJohn Baldwin 
4162da066efSJohn Baldwin 	return (0);
4172da066efSJohn Baldwin }
4182da066efSJohn Baldwin 
4192da066efSJohn Baldwin /*
4202da066efSJohn Baldwin  * Construct and send a PDU that contains an optional data payload.
4212da066efSJohn Baldwin  * This includes dealing with digests and the length fields in the
4222da066efSJohn Baldwin  * common header.
4232da066efSJohn Baldwin  */
4242da066efSJohn Baldwin static int
4252da066efSJohn Baldwin nvmf_tcp_construct_pdu(struct nvmf_tcp_qpair *qp, void *hdr, size_t hlen,
4262da066efSJohn Baldwin     void *data, uint32_t data_len)
4272da066efSJohn Baldwin {
4282da066efSJohn Baldwin 	struct nvme_tcp_common_pdu_hdr *ch;
4292da066efSJohn Baldwin 	struct iovec iov[5];
4302da066efSJohn Baldwin 	u_int iovcnt;
4312da066efSJohn Baldwin 	uint32_t header_digest, data_digest, pad, pdo, plen;
4322da066efSJohn Baldwin 
4332da066efSJohn Baldwin 	plen = hlen;
4342da066efSJohn Baldwin 	if (qp->header_digests)
4352da066efSJohn Baldwin 		plen += sizeof(header_digest);
4362da066efSJohn Baldwin 	if (data_len != 0) {
43706b2ed7aSJohn Baldwin 		pdo = roundup(plen, qp->txpda);
4382da066efSJohn Baldwin 		pad = pdo - plen;
4392da066efSJohn Baldwin 		plen = pdo + data_len;
4402da066efSJohn Baldwin 		if (qp->data_digests)
4412da066efSJohn Baldwin 			plen += sizeof(data_digest);
4422da066efSJohn Baldwin 	} else {
4432da066efSJohn Baldwin 		assert(data == NULL);
4442da066efSJohn Baldwin 		pdo = 0;
4452da066efSJohn Baldwin 		pad = 0;
4462da066efSJohn Baldwin 	}
4472da066efSJohn Baldwin 
4482da066efSJohn Baldwin 	ch = hdr;
4492da066efSJohn Baldwin 	ch->hlen = hlen;
4502da066efSJohn Baldwin 	if (qp->header_digests)
4512da066efSJohn Baldwin 		ch->flags |= NVME_TCP_CH_FLAGS_HDGSTF;
4522da066efSJohn Baldwin 	if (qp->data_digests && data_len != 0)
4532da066efSJohn Baldwin 		ch->flags |= NVME_TCP_CH_FLAGS_DDGSTF;
4542da066efSJohn Baldwin 	ch->pdo = pdo;
4552da066efSJohn Baldwin 	ch->plen = htole32(plen);
4562da066efSJohn Baldwin 
4572da066efSJohn Baldwin 	/* CH + PSH */
4582da066efSJohn Baldwin 	iov[0].iov_base = hdr;
4592da066efSJohn Baldwin 	iov[0].iov_len = hlen;
4602da066efSJohn Baldwin 	iovcnt = 1;
4612da066efSJohn Baldwin 
4622da066efSJohn Baldwin 	/* HDGST */
4632da066efSJohn Baldwin 	if (qp->header_digests) {
4642da066efSJohn Baldwin 		header_digest = compute_digest(hdr, hlen);
4652da066efSJohn Baldwin 		iov[iovcnt].iov_base = &header_digest;
4662da066efSJohn Baldwin 		iov[iovcnt].iov_len = sizeof(header_digest);
4672da066efSJohn Baldwin 		iovcnt++;
4682da066efSJohn Baldwin 	}
4692da066efSJohn Baldwin 
4702da066efSJohn Baldwin 	if (pad != 0) {
4712da066efSJohn Baldwin 		/* PAD */
4722da066efSJohn Baldwin 		iov[iovcnt].iov_base = __DECONST(char *, zero_padding);
4732da066efSJohn Baldwin 		iov[iovcnt].iov_len = pad;
4742da066efSJohn Baldwin 		iovcnt++;
4752da066efSJohn Baldwin 	}
4762da066efSJohn Baldwin 
4772da066efSJohn Baldwin 	if (data_len != 0) {
4782da066efSJohn Baldwin 		/* DATA */
4792da066efSJohn Baldwin 		iov[iovcnt].iov_base = data;
4802da066efSJohn Baldwin 		iov[iovcnt].iov_len = data_len;
4812da066efSJohn Baldwin 		iovcnt++;
4822da066efSJohn Baldwin 
4832da066efSJohn Baldwin 		/* DDGST */
4842da066efSJohn Baldwin 		if (qp->data_digests) {
4852da066efSJohn Baldwin 			data_digest = compute_digest(data, data_len);
4862da066efSJohn Baldwin 			iov[iovcnt].iov_base = &data_digest;
4872da066efSJohn Baldwin 			iov[iovcnt].iov_len = sizeof(data_digest);
4882da066efSJohn Baldwin 			iovcnt++;
4892da066efSJohn Baldwin 		}
4902da066efSJohn Baldwin 	}
4912da066efSJohn Baldwin 
4922da066efSJohn Baldwin 	return (nvmf_tcp_write_pdu_iov(qp, iov, iovcnt, plen));
4932da066efSJohn Baldwin }
4942da066efSJohn Baldwin 
4952da066efSJohn Baldwin static int
4962da066efSJohn Baldwin nvmf_tcp_handle_h2c_data(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu)
4972da066efSJohn Baldwin {
4982da066efSJohn Baldwin 	struct nvme_tcp_h2c_data_hdr *h2c;
4992da066efSJohn Baldwin 	struct nvmf_tcp_command_buffer *cb;
5002da066efSJohn Baldwin 	uint32_t data_len, data_offset;
5012da066efSJohn Baldwin 	const char *icd;
5022da066efSJohn Baldwin 
5032da066efSJohn Baldwin 	h2c = (void *)pdu->hdr;
5042da066efSJohn Baldwin 	if (le32toh(h2c->datal) > qp->maxh2cdata) {
5052da066efSJohn Baldwin 		nvmf_tcp_report_error(qp->qp.nq_association, qp,
5062da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_LIMIT_EXCEEDED, 0,
5072da066efSJohn Baldwin 		    pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen);
5082da066efSJohn Baldwin 		nvmf_tcp_free_pdu(pdu);
5092da066efSJohn Baldwin 		return (EBADMSG);
5102da066efSJohn Baldwin 	}
5112da066efSJohn Baldwin 
5122da066efSJohn Baldwin 	cb = tcp_find_command_buffer(qp, h2c->cccid, h2c->ttag, true);
5132da066efSJohn Baldwin 	if (cb == NULL) {
5142da066efSJohn Baldwin 		nvmf_tcp_report_error(qp->qp.nq_association, qp,
5152da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
5162da066efSJohn Baldwin 		    offsetof(struct nvme_tcp_h2c_data_hdr, ttag), pdu->hdr,
5172da066efSJohn Baldwin 		    le32toh(pdu->hdr->plen), pdu->hdr->hlen);
5182da066efSJohn Baldwin 		nvmf_tcp_free_pdu(pdu);
5192da066efSJohn Baldwin 		return (EBADMSG);
5202da066efSJohn Baldwin 	}
5212da066efSJohn Baldwin 
5222da066efSJohn Baldwin 	data_len = le32toh(h2c->datal);
5232da066efSJohn Baldwin 	if (data_len != pdu->data_len) {
5242da066efSJohn Baldwin 		nvmf_tcp_report_error(qp->qp.nq_association, qp,
5252da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
5262da066efSJohn Baldwin 		    offsetof(struct nvme_tcp_h2c_data_hdr, datal), pdu->hdr,
5272da066efSJohn Baldwin 		    le32toh(pdu->hdr->plen), pdu->hdr->hlen);
5282da066efSJohn Baldwin 		nvmf_tcp_free_pdu(pdu);
5292da066efSJohn Baldwin 		return (EBADMSG);
5302da066efSJohn Baldwin 	}
5312da066efSJohn Baldwin 
5322da066efSJohn Baldwin 	data_offset = le32toh(h2c->datao);
5332da066efSJohn Baldwin 	if (data_offset < cb->data_offset ||
5342da066efSJohn Baldwin 	    data_offset + data_len > cb->data_offset + cb->data_len) {
5352da066efSJohn Baldwin 		nvmf_tcp_report_error(qp->qp.nq_association, qp,
5362da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0,
5372da066efSJohn Baldwin 		    pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen);
5382da066efSJohn Baldwin 		nvmf_tcp_free_pdu(pdu);
5392da066efSJohn Baldwin 		return (EBADMSG);
5402da066efSJohn Baldwin 	}
5412da066efSJohn Baldwin 
5422da066efSJohn Baldwin 	if (data_offset != cb->data_offset + cb->data_xfered) {
5432da066efSJohn Baldwin 		nvmf_tcp_report_error(qp->qp.nq_association, qp,
5442da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr,
5452da066efSJohn Baldwin 		    le32toh(pdu->hdr->plen), pdu->hdr->hlen);
5462da066efSJohn Baldwin 		nvmf_tcp_free_pdu(pdu);
5472da066efSJohn Baldwin 		return (EBADMSG);
5482da066efSJohn Baldwin 	}
5492da066efSJohn Baldwin 
5502da066efSJohn Baldwin 	if ((cb->data_xfered + data_len == cb->data_len) !=
5512da066efSJohn Baldwin 	    ((pdu->hdr->flags & NVME_TCP_H2C_DATA_FLAGS_LAST_PDU) != 0)) {
5522da066efSJohn Baldwin 		nvmf_tcp_report_error(qp->qp.nq_association, qp,
5532da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr,
5542da066efSJohn Baldwin 		    le32toh(pdu->hdr->plen), pdu->hdr->hlen);
5552da066efSJohn Baldwin 		nvmf_tcp_free_pdu(pdu);
5562da066efSJohn Baldwin 		return (EBADMSG);
5572da066efSJohn Baldwin 	}
5582da066efSJohn Baldwin 
5592da066efSJohn Baldwin 	cb->data_xfered += data_len;
5602da066efSJohn Baldwin 	data_offset -= cb->data_offset;
5612da066efSJohn Baldwin 	icd = (const char *)pdu->hdr + pdu->hdr->pdo;
5622da066efSJohn Baldwin 	memcpy((char *)cb->data + data_offset, icd, data_len);
5632da066efSJohn Baldwin 
5642da066efSJohn Baldwin 	nvmf_tcp_free_pdu(pdu);
5652da066efSJohn Baldwin 	return (0);
5662da066efSJohn Baldwin }
5672da066efSJohn Baldwin 
5682da066efSJohn Baldwin static int
5692da066efSJohn Baldwin nvmf_tcp_handle_c2h_data(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu)
5702da066efSJohn Baldwin {
5712da066efSJohn Baldwin 	struct nvme_tcp_c2h_data_hdr *c2h;
5722da066efSJohn Baldwin 	struct nvmf_tcp_command_buffer *cb;
5732da066efSJohn Baldwin 	uint32_t data_len, data_offset;
5742da066efSJohn Baldwin 	const char *icd;
5752da066efSJohn Baldwin 
5762da066efSJohn Baldwin 	c2h = (void *)pdu->hdr;
5772da066efSJohn Baldwin 
5782da066efSJohn Baldwin 	cb = tcp_find_command_buffer(qp, c2h->cccid, 0, true);
5792da066efSJohn Baldwin 	if (cb == NULL) {
5802da066efSJohn Baldwin 		/*
5812da066efSJohn Baldwin 		 * XXX: Could be PDU sequence error if cccid is for a
5822da066efSJohn Baldwin 		 * command that doesn't use a command buffer.
5832da066efSJohn Baldwin 		 */
5842da066efSJohn Baldwin 		nvmf_tcp_report_error(qp->qp.nq_association, qp,
5852da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
5862da066efSJohn Baldwin 		    offsetof(struct nvme_tcp_c2h_data_hdr, cccid), pdu->hdr,
5872da066efSJohn Baldwin 		    le32toh(pdu->hdr->plen), pdu->hdr->hlen);
5882da066efSJohn Baldwin 		nvmf_tcp_free_pdu(pdu);
5892da066efSJohn Baldwin 		return (EBADMSG);
5902da066efSJohn Baldwin 	}
5912da066efSJohn Baldwin 
5922da066efSJohn Baldwin 	data_len = le32toh(c2h->datal);
5932da066efSJohn Baldwin 	if (data_len != pdu->data_len) {
5942da066efSJohn Baldwin 		nvmf_tcp_report_error(qp->qp.nq_association, qp,
5952da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
5962da066efSJohn Baldwin 		    offsetof(struct nvme_tcp_c2h_data_hdr, datal), pdu->hdr,
5972da066efSJohn Baldwin 		    le32toh(pdu->hdr->plen), pdu->hdr->hlen);
5982da066efSJohn Baldwin 		nvmf_tcp_free_pdu(pdu);
5992da066efSJohn Baldwin 		return (EBADMSG);
6002da066efSJohn Baldwin 	}
6012da066efSJohn Baldwin 
6022da066efSJohn Baldwin 	data_offset = le32toh(c2h->datao);
6032da066efSJohn Baldwin 	if (data_offset < cb->data_offset ||
6042da066efSJohn Baldwin 	    data_offset + data_len > cb->data_offset + cb->data_len) {
6052da066efSJohn Baldwin 		nvmf_tcp_report_error(qp->qp.nq_association, qp,
6062da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0,
6072da066efSJohn Baldwin 		    pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen);
6082da066efSJohn Baldwin 		nvmf_tcp_free_pdu(pdu);
6092da066efSJohn Baldwin 		return (EBADMSG);
6102da066efSJohn Baldwin 	}
6112da066efSJohn Baldwin 
6122da066efSJohn Baldwin 	if (data_offset != cb->data_offset + cb->data_xfered) {
6132da066efSJohn Baldwin 		nvmf_tcp_report_error(qp->qp.nq_association, qp,
6142da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr,
6152da066efSJohn Baldwin 		    le32toh(pdu->hdr->plen), pdu->hdr->hlen);
6162da066efSJohn Baldwin 		nvmf_tcp_free_pdu(pdu);
6172da066efSJohn Baldwin 		return (EBADMSG);
6182da066efSJohn Baldwin 	}
6192da066efSJohn Baldwin 
6202da066efSJohn Baldwin 	if ((cb->data_xfered + data_len == cb->data_len) !=
6212da066efSJohn Baldwin 	    ((pdu->hdr->flags & NVME_TCP_C2H_DATA_FLAGS_LAST_PDU) != 0)) {
6222da066efSJohn Baldwin 		nvmf_tcp_report_error(qp->qp.nq_association, qp,
6232da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr,
6242da066efSJohn Baldwin 		    le32toh(pdu->hdr->plen), pdu->hdr->hlen);
6252da066efSJohn Baldwin 		nvmf_tcp_free_pdu(pdu);
6262da066efSJohn Baldwin 		return (EBADMSG);
6272da066efSJohn Baldwin 	}
6282da066efSJohn Baldwin 
6292da066efSJohn Baldwin 	cb->data_xfered += data_len;
6302da066efSJohn Baldwin 	data_offset -= cb->data_offset;
6312da066efSJohn Baldwin 	icd = (const char *)pdu->hdr + pdu->hdr->pdo;
6322da066efSJohn Baldwin 	memcpy((char *)cb->data + data_offset, icd, data_len);
6332da066efSJohn Baldwin 
6342da066efSJohn Baldwin 	if ((pdu->hdr->flags & NVME_TCP_C2H_DATA_FLAGS_SUCCESS) != 0) {
6352da066efSJohn Baldwin 		struct nvme_completion cqe;
6362da066efSJohn Baldwin 		struct nvmf_tcp_capsule *tc;
6372da066efSJohn Baldwin 		struct nvmf_capsule *nc;
6382da066efSJohn Baldwin 
6392da066efSJohn Baldwin 		memset(&cqe, 0, sizeof(cqe));
6402da066efSJohn Baldwin 		cqe.cid = cb->cid;
6412da066efSJohn Baldwin 
6422da066efSJohn Baldwin 		nc = nvmf_allocate_response(&qp->qp, &cqe);
6432da066efSJohn Baldwin 		if (nc == NULL) {
6442da066efSJohn Baldwin 			nvmf_tcp_free_pdu(pdu);
6452da066efSJohn Baldwin 			return (ENOMEM);
6462da066efSJohn Baldwin 		}
6472da066efSJohn Baldwin 		nc->nc_sqhd_valid = false;
6482da066efSJohn Baldwin 
6492da066efSJohn Baldwin 		tc = TCAP(nc);
6502da066efSJohn Baldwin 		TAILQ_INSERT_TAIL(&qp->rx_capsules, tc, link);
6512da066efSJohn Baldwin 	}
6522da066efSJohn Baldwin 
6532da066efSJohn Baldwin 	nvmf_tcp_free_pdu(pdu);
6542da066efSJohn Baldwin 	return (0);
6552da066efSJohn Baldwin }
6562da066efSJohn Baldwin 
6572da066efSJohn Baldwin /* NB: cid and ttag and little-endian already. */
6582da066efSJohn Baldwin static int
6592da066efSJohn Baldwin tcp_send_h2c_pdu(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag,
6602da066efSJohn Baldwin     uint32_t data_offset, void *buf, size_t len, bool last_pdu)
6612da066efSJohn Baldwin {
6622da066efSJohn Baldwin 	struct nvme_tcp_h2c_data_hdr h2c;
6632da066efSJohn Baldwin 
6642da066efSJohn Baldwin 	memset(&h2c, 0, sizeof(h2c));
6652da066efSJohn Baldwin 	h2c.common.pdu_type = NVME_TCP_PDU_TYPE_H2C_DATA;
6662da066efSJohn Baldwin 	if (last_pdu)
6672da066efSJohn Baldwin 		h2c.common.flags |= NVME_TCP_H2C_DATA_FLAGS_LAST_PDU;
6682da066efSJohn Baldwin 	h2c.cccid = cid;
6692da066efSJohn Baldwin 	h2c.ttag = ttag;
6702da066efSJohn Baldwin 	h2c.datao = htole32(data_offset);
6712da066efSJohn Baldwin 	h2c.datal = htole32(len);
6722da066efSJohn Baldwin 
6732da066efSJohn Baldwin 	return (nvmf_tcp_construct_pdu(qp, &h2c, sizeof(h2c), buf, len));
6742da066efSJohn Baldwin }
6752da066efSJohn Baldwin 
6762da066efSJohn Baldwin /* Sends one or more H2C_DATA PDUs, subject to MAXH2CDATA. */
6772da066efSJohn Baldwin static int
6782da066efSJohn Baldwin tcp_send_h2c_pdus(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag,
6792da066efSJohn Baldwin     uint32_t data_offset, void *buf, size_t len, bool last_pdu)
6802da066efSJohn Baldwin {
6812da066efSJohn Baldwin 	char *p;
6822da066efSJohn Baldwin 
6832da066efSJohn Baldwin 	p = buf;
6842da066efSJohn Baldwin 	while (len != 0) {
6852da066efSJohn Baldwin 		size_t todo;
6862da066efSJohn Baldwin 		int error;
6872da066efSJohn Baldwin 
6882da066efSJohn Baldwin 		todo = len;
6892da066efSJohn Baldwin 		if (todo > qp->maxh2cdata)
6902da066efSJohn Baldwin 			todo = qp->maxh2cdata;
6912da066efSJohn Baldwin 		error = tcp_send_h2c_pdu(qp, cid, ttag, data_offset, p, todo,
6922da066efSJohn Baldwin 		    last_pdu && todo == len);
6932da066efSJohn Baldwin 		if (error != 0)
6942da066efSJohn Baldwin 			return (error);
6952da066efSJohn Baldwin 		p += todo;
6962da066efSJohn Baldwin 		len -= todo;
6972da066efSJohn Baldwin 	}
6982da066efSJohn Baldwin 	return (0);
6992da066efSJohn Baldwin }
7002da066efSJohn Baldwin 
7012da066efSJohn Baldwin static int
7022da066efSJohn Baldwin nvmf_tcp_handle_r2t(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu)
7032da066efSJohn Baldwin {
7042da066efSJohn Baldwin 	struct nvmf_tcp_command_buffer *cb;
7052da066efSJohn Baldwin 	struct nvme_tcp_r2t_hdr *r2t;
7062da066efSJohn Baldwin 	uint32_t data_len, data_offset;
7072da066efSJohn Baldwin 	int error;
7082da066efSJohn Baldwin 
7092da066efSJohn Baldwin 	r2t = (void *)pdu->hdr;
7102da066efSJohn Baldwin 
7112da066efSJohn Baldwin 	cb = tcp_find_command_buffer(qp, r2t->cccid, 0, false);
7122da066efSJohn Baldwin 	if (cb == NULL) {
7132da066efSJohn Baldwin 		nvmf_tcp_report_error(qp->qp.nq_association, qp,
7142da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
7152da066efSJohn Baldwin 		    offsetof(struct nvme_tcp_r2t_hdr, cccid), pdu->hdr,
7162da066efSJohn Baldwin 		    le32toh(pdu->hdr->plen), pdu->hdr->hlen);
7172da066efSJohn Baldwin 		nvmf_tcp_free_pdu(pdu);
7182da066efSJohn Baldwin 		return (EBADMSG);
7192da066efSJohn Baldwin 	}
7202da066efSJohn Baldwin 
7212da066efSJohn Baldwin 	data_offset = le32toh(r2t->r2to);
7222da066efSJohn Baldwin 	if (data_offset != cb->data_xfered) {
7232da066efSJohn Baldwin 		nvmf_tcp_report_error(qp->qp.nq_association, qp,
7242da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr,
7252da066efSJohn Baldwin 		    le32toh(pdu->hdr->plen), pdu->hdr->hlen);
7262da066efSJohn Baldwin 		nvmf_tcp_free_pdu(pdu);
7272da066efSJohn Baldwin 		return (EBADMSG);
7282da066efSJohn Baldwin 	}
7292da066efSJohn Baldwin 
7302da066efSJohn Baldwin 	/*
7312da066efSJohn Baldwin 	 * XXX: The spec does not specify how to handle R2T tranfers
7322da066efSJohn Baldwin 	 * out of range of the original command.
7332da066efSJohn Baldwin 	 */
7342da066efSJohn Baldwin 	data_len = le32toh(r2t->r2tl);
7352da066efSJohn Baldwin 	if (data_offset + data_len > cb->data_len) {
7362da066efSJohn Baldwin 		nvmf_tcp_report_error(qp->qp.nq_association, qp,
7372da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0,
7382da066efSJohn Baldwin 		    pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen);
7392da066efSJohn Baldwin 		nvmf_tcp_free_pdu(pdu);
7402da066efSJohn Baldwin 		return (EBADMSG);
7412da066efSJohn Baldwin 	}
7422da066efSJohn Baldwin 
7432da066efSJohn Baldwin 	cb->data_xfered += data_len;
7442da066efSJohn Baldwin 
7452da066efSJohn Baldwin 	/*
7462da066efSJohn Baldwin 	 * Write out one or more H2C_DATA PDUs containing the
7472da066efSJohn Baldwin 	 * requested data.
7482da066efSJohn Baldwin 	 */
7492da066efSJohn Baldwin 	error = tcp_send_h2c_pdus(qp, r2t->cccid, r2t->ttag,
7502da066efSJohn Baldwin 	    data_offset, (char *)cb->data + data_offset, data_len, true);
7512da066efSJohn Baldwin 
7522da066efSJohn Baldwin 	nvmf_tcp_free_pdu(pdu);
7532da066efSJohn Baldwin 	return (error);
7542da066efSJohn Baldwin }
7552da066efSJohn Baldwin 
7562da066efSJohn Baldwin static int
7572da066efSJohn Baldwin nvmf_tcp_receive_pdu(struct nvmf_tcp_qpair *qp)
7582da066efSJohn Baldwin {
7592da066efSJohn Baldwin 	struct nvmf_tcp_rxpdu pdu;
7602da066efSJohn Baldwin 	int error;
7612da066efSJohn Baldwin 
7622da066efSJohn Baldwin 	error = nvmf_tcp_read_pdu(qp, &pdu);
7632da066efSJohn Baldwin 	if (error != 0)
7642da066efSJohn Baldwin 		return (error);
7652da066efSJohn Baldwin 
7662da066efSJohn Baldwin 	switch (pdu.hdr->pdu_type) {
7672da066efSJohn Baldwin 	default:
7682da066efSJohn Baldwin 		__unreachable();
7692da066efSJohn Baldwin 		break;
7702da066efSJohn Baldwin 	case NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
7712da066efSJohn Baldwin 	case NVME_TCP_PDU_TYPE_C2H_TERM_REQ:
7722da066efSJohn Baldwin 		return (nvmf_tcp_handle_term_req(&pdu));
7732da066efSJohn Baldwin 	case NVME_TCP_PDU_TYPE_CAPSULE_CMD:
7742da066efSJohn Baldwin 		return (nvmf_tcp_save_command_capsule(qp, &pdu));
7752da066efSJohn Baldwin 	case NVME_TCP_PDU_TYPE_CAPSULE_RESP:
7762da066efSJohn Baldwin 		return (nvmf_tcp_save_response_capsule(qp, &pdu));
7772da066efSJohn Baldwin 	case NVME_TCP_PDU_TYPE_H2C_DATA:
7782da066efSJohn Baldwin 		return (nvmf_tcp_handle_h2c_data(qp, &pdu));
7792da066efSJohn Baldwin 	case NVME_TCP_PDU_TYPE_C2H_DATA:
7802da066efSJohn Baldwin 		return (nvmf_tcp_handle_c2h_data(qp, &pdu));
7812da066efSJohn Baldwin 	case NVME_TCP_PDU_TYPE_R2T:
7822da066efSJohn Baldwin 		return (nvmf_tcp_handle_r2t(qp, &pdu));
7832da066efSJohn Baldwin 	}
7842da066efSJohn Baldwin }
7852da066efSJohn Baldwin 
7862da066efSJohn Baldwin static bool
7872da066efSJohn Baldwin nvmf_tcp_validate_ic_pdu(struct nvmf_association *na, struct nvmf_tcp_qpair *qp,
7882da066efSJohn Baldwin     const struct nvme_tcp_common_pdu_hdr *ch, size_t pdu_len)
7892da066efSJohn Baldwin {
7902da066efSJohn Baldwin 	const struct nvme_tcp_ic_req *pdu;
7912da066efSJohn Baldwin 	uint32_t plen;
7922da066efSJohn Baldwin 	u_int hlen;
7932da066efSJohn Baldwin 
7942da066efSJohn Baldwin 	/* Determine how large of a PDU header to return for errors. */
7952da066efSJohn Baldwin 	hlen = ch->hlen;
7962da066efSJohn Baldwin 	plen = le32toh(ch->plen);
7972da066efSJohn Baldwin 	if (hlen < sizeof(*ch) || hlen > plen)
7982da066efSJohn Baldwin 		hlen = sizeof(*ch);
7992da066efSJohn Baldwin 
8002da066efSJohn Baldwin 	/*
8012da066efSJohn Baldwin 	 * Errors must be reported for the lowest incorrect field
8022da066efSJohn Baldwin 	 * first, so validate fields in order.
8032da066efSJohn Baldwin 	 */
8042da066efSJohn Baldwin 
8052da066efSJohn Baldwin 	/* Validate pdu_type. */
8062da066efSJohn Baldwin 
8072da066efSJohn Baldwin 	/* Controllers only receive PDUs with a PDU direction of 0. */
808a7db82cfSJohn Baldwin 	if (na->na_controller != ((ch->pdu_type & 0x01) == 0)) {
8092da066efSJohn Baldwin 		na_error(na, "NVMe/TCP: Invalid PDU type %u", ch->pdu_type);
8102da066efSJohn Baldwin 		nvmf_tcp_report_error(na, qp,
8112da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 0, ch, pdu_len,
8122da066efSJohn Baldwin 		    hlen);
8132da066efSJohn Baldwin 		return (false);
8142da066efSJohn Baldwin 	}
8152da066efSJohn Baldwin 
8162da066efSJohn Baldwin 	switch (ch->pdu_type) {
8172da066efSJohn Baldwin 	case NVME_TCP_PDU_TYPE_IC_REQ:
8182da066efSJohn Baldwin 	case NVME_TCP_PDU_TYPE_IC_RESP:
8192da066efSJohn Baldwin 		break;
8202da066efSJohn Baldwin 	default:
8212da066efSJohn Baldwin 		na_error(na, "NVMe/TCP: Invalid PDU type %u", ch->pdu_type);
8222da066efSJohn Baldwin 		nvmf_tcp_report_error(na, qp,
8232da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 0, ch, pdu_len,
8242da066efSJohn Baldwin 		    hlen);
8252da066efSJohn Baldwin 		return (false);
8262da066efSJohn Baldwin 	}
8272da066efSJohn Baldwin 
8282da066efSJohn Baldwin 	/* Validate flags. */
8292da066efSJohn Baldwin 	if (ch->flags != 0) {
8302da066efSJohn Baldwin 		na_error(na, "NVMe/TCP: Invalid PDU header flags %#x",
8312da066efSJohn Baldwin 		    ch->flags);
8322da066efSJohn Baldwin 		nvmf_tcp_report_error(na, qp,
8332da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 1, ch, pdu_len,
8342da066efSJohn Baldwin 		    hlen);
8352da066efSJohn Baldwin 		return (false);
8362da066efSJohn Baldwin 	}
8372da066efSJohn Baldwin 
8382da066efSJohn Baldwin 	/* Validate hlen. */
8392da066efSJohn Baldwin 	if (ch->hlen != 128) {
8402da066efSJohn Baldwin 		na_error(na, "NVMe/TCP: Invalid PDU header length %u",
8412da066efSJohn Baldwin 		    ch->hlen);
8422da066efSJohn Baldwin 		nvmf_tcp_report_error(na, qp,
8432da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 2, ch, pdu_len,
8442da066efSJohn Baldwin 		    hlen);
8452da066efSJohn Baldwin 		return (false);
8462da066efSJohn Baldwin 	}
8472da066efSJohn Baldwin 
8482da066efSJohn Baldwin 	/* Validate pdo. */
8492da066efSJohn Baldwin 	if (ch->pdo != 0) {
8502da066efSJohn Baldwin 		na_error(na, "NVMe/TCP: Invalid PDU data offset %u", ch->pdo);
8512da066efSJohn Baldwin 		nvmf_tcp_report_error(na, qp,
8522da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 3, ch, pdu_len,
8532da066efSJohn Baldwin 		    hlen);
8542da066efSJohn Baldwin 		return (false);
8552da066efSJohn Baldwin 	}
8562da066efSJohn Baldwin 
8572da066efSJohn Baldwin 	/* Validate plen. */
8582da066efSJohn Baldwin 	if (plen != 128) {
8592da066efSJohn Baldwin 		na_error(na, "NVMe/TCP: Invalid PDU length %u", plen);
8602da066efSJohn Baldwin 		nvmf_tcp_report_error(na, qp,
8612da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 4, ch, pdu_len,
8622da066efSJohn Baldwin 		    hlen);
8632da066efSJohn Baldwin 		return (false);
8642da066efSJohn Baldwin 	}
8652da066efSJohn Baldwin 
8662da066efSJohn Baldwin 	/* Validate fields common to both ICReq and ICResp. */
8672da066efSJohn Baldwin 	pdu = (const struct nvme_tcp_ic_req *)ch;
8682da066efSJohn Baldwin 	if (le16toh(pdu->pfv) != 0) {
8692da066efSJohn Baldwin 		na_error(na, "NVMe/TCP: Unsupported PDU version %u",
8702da066efSJohn Baldwin 		    le16toh(pdu->pfv));
8712da066efSJohn Baldwin 		nvmf_tcp_report_error(na, qp,
8722da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER,
8732da066efSJohn Baldwin 		    8, ch, pdu_len, hlen);
8742da066efSJohn Baldwin 		return (false);
8752da066efSJohn Baldwin 	}
8762da066efSJohn Baldwin 
8772da066efSJohn Baldwin 	if (pdu->hpda > NVME_TCP_HPDA_MAX) {
8782da066efSJohn Baldwin 		na_error(na, "NVMe/TCP: Unsupported PDA %u", pdu->hpda);
8792da066efSJohn Baldwin 		nvmf_tcp_report_error(na, qp,
8802da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 10, ch, pdu_len,
8812da066efSJohn Baldwin 		    hlen);
8822da066efSJohn Baldwin 		return (false);
8832da066efSJohn Baldwin 	}
8842da066efSJohn Baldwin 
8852da066efSJohn Baldwin 	if (pdu->dgst.bits.reserved != 0) {
8862da066efSJohn Baldwin 		na_error(na, "NVMe/TCP: Invalid digest settings");
8872da066efSJohn Baldwin 		nvmf_tcp_report_error(na, qp,
8882da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 11, ch, pdu_len,
8892da066efSJohn Baldwin 		    hlen);
8902da066efSJohn Baldwin 		return (false);
8912da066efSJohn Baldwin 	}
8922da066efSJohn Baldwin 
8932da066efSJohn Baldwin 	return (true);
8942da066efSJohn Baldwin }
8952da066efSJohn Baldwin 
8962da066efSJohn Baldwin static bool
8972da066efSJohn Baldwin nvmf_tcp_read_ic_req(struct nvmf_association *na, struct nvmf_tcp_qpair *qp,
8982da066efSJohn Baldwin     struct nvme_tcp_ic_req *pdu)
8992da066efSJohn Baldwin {
9002da066efSJohn Baldwin 	int error;
9012da066efSJohn Baldwin 
9022da066efSJohn Baldwin 	error = nvmf_tcp_read_buffer(qp->s, pdu, sizeof(*pdu));
9032da066efSJohn Baldwin 	if (error != 0) {
9042da066efSJohn Baldwin 		na_error(na, "NVMe/TCP: Failed to read IC request: %s",
9052da066efSJohn Baldwin 		    strerror(error));
9062da066efSJohn Baldwin 		return (false);
9072da066efSJohn Baldwin 	}
9082da066efSJohn Baldwin 
9092da066efSJohn Baldwin 	return (nvmf_tcp_validate_ic_pdu(na, qp, &pdu->common, sizeof(*pdu)));
9102da066efSJohn Baldwin }
9112da066efSJohn Baldwin 
9122da066efSJohn Baldwin static bool
9132da066efSJohn Baldwin nvmf_tcp_read_ic_resp(struct nvmf_association *na, struct nvmf_tcp_qpair *qp,
9142da066efSJohn Baldwin     struct nvme_tcp_ic_resp *pdu)
9152da066efSJohn Baldwin {
9162da066efSJohn Baldwin 	int error;
9172da066efSJohn Baldwin 
9182da066efSJohn Baldwin 	error = nvmf_tcp_read_buffer(qp->s, pdu, sizeof(*pdu));
9192da066efSJohn Baldwin 	if (error != 0) {
9202da066efSJohn Baldwin 		na_error(na, "NVMe/TCP: Failed to read IC response: %s",
9212da066efSJohn Baldwin 		    strerror(error));
9222da066efSJohn Baldwin 		return (false);
9232da066efSJohn Baldwin 	}
9242da066efSJohn Baldwin 
9252da066efSJohn Baldwin 	return (nvmf_tcp_validate_ic_pdu(na, qp, &pdu->common, sizeof(*pdu)));
9262da066efSJohn Baldwin }
9272da066efSJohn Baldwin 
9282da066efSJohn Baldwin static struct nvmf_association *
929846d702fSJohn Baldwin tcp_allocate_association(bool controller,
930846d702fSJohn Baldwin     const struct nvmf_association_params *params)
9312da066efSJohn Baldwin {
9322da066efSJohn Baldwin 	struct nvmf_tcp_association *ta;
9332da066efSJohn Baldwin 
934846d702fSJohn Baldwin 	if (controller) {
935846d702fSJohn Baldwin 		/* 7.4.10.3 */
936846d702fSJohn Baldwin 		if (params->tcp.maxh2cdata < 4096 ||
937846d702fSJohn Baldwin 		    params->tcp.maxh2cdata % 4 != 0)
938846d702fSJohn Baldwin 			return (NULL);
939846d702fSJohn Baldwin 	}
940846d702fSJohn Baldwin 
9412da066efSJohn Baldwin 	ta = calloc(1, sizeof(*ta));
9422da066efSJohn Baldwin 
9432da066efSJohn Baldwin 	return (&ta->na);
9442da066efSJohn Baldwin }
9452da066efSJohn Baldwin 
9462da066efSJohn Baldwin static void
9472da066efSJohn Baldwin tcp_update_association(struct nvmf_association *na,
9482da066efSJohn Baldwin     const struct nvme_controller_data *cdata)
9492da066efSJohn Baldwin {
9502da066efSJohn Baldwin 	struct nvmf_tcp_association *ta = TASSOC(na);
9512da066efSJohn Baldwin 
9522da066efSJohn Baldwin 	ta->ioccsz = le32toh(cdata->ioccsz);
9532da066efSJohn Baldwin }
9542da066efSJohn Baldwin 
9552da066efSJohn Baldwin static void
9562da066efSJohn Baldwin tcp_free_association(struct nvmf_association *na)
9572da066efSJohn Baldwin {
9582da066efSJohn Baldwin 	free(na);
9592da066efSJohn Baldwin }
9602da066efSJohn Baldwin 
9612da066efSJohn Baldwin static bool
9622da066efSJohn Baldwin tcp_connect(struct nvmf_tcp_qpair *qp, struct nvmf_association *na, bool admin)
9632da066efSJohn Baldwin {
9642da066efSJohn Baldwin 	const struct nvmf_association_params *params = &na->na_params;
9652da066efSJohn Baldwin 	struct nvmf_tcp_association *ta = TASSOC(na);
9662da066efSJohn Baldwin 	struct nvme_tcp_ic_req ic_req;
9672da066efSJohn Baldwin 	struct nvme_tcp_ic_resp ic_resp;
968fd0e6af5SJohn Baldwin 	uint32_t maxh2cdata;
9692da066efSJohn Baldwin 	int error;
9702da066efSJohn Baldwin 
9712da066efSJohn Baldwin 	if (!admin) {
9722da066efSJohn Baldwin 		if (ta->ioccsz == 0) {
9732da066efSJohn Baldwin 			na_error(na, "TCP I/O queues require cdata");
9742da066efSJohn Baldwin 			return (false);
9752da066efSJohn Baldwin 		}
9762da066efSJohn Baldwin 		if (ta->ioccsz < 4) {
9772da066efSJohn Baldwin 			na_error(na, "Invalid IOCCSZ %u", ta->ioccsz);
9782da066efSJohn Baldwin 			return (false);
9792da066efSJohn Baldwin 		}
9802da066efSJohn Baldwin 	}
9812da066efSJohn Baldwin 
9822da066efSJohn Baldwin 	memset(&ic_req, 0, sizeof(ic_req));
9832da066efSJohn Baldwin 	ic_req.common.pdu_type = NVME_TCP_PDU_TYPE_IC_REQ;
9842da066efSJohn Baldwin 	ic_req.common.hlen = sizeof(ic_req);
9852da066efSJohn Baldwin 	ic_req.common.plen = htole32(sizeof(ic_req));
9862da066efSJohn Baldwin 	ic_req.pfv = htole16(0);
9872da066efSJohn Baldwin 	ic_req.hpda = params->tcp.pda;
9882da066efSJohn Baldwin 	if (params->tcp.header_digests)
9892da066efSJohn Baldwin 		ic_req.dgst.bits.hdgst_enable = 1;
9902da066efSJohn Baldwin 	if (params->tcp.data_digests)
9912da066efSJohn Baldwin 		ic_req.dgst.bits.ddgst_enable = 1;
9922da066efSJohn Baldwin 	ic_req.maxr2t = htole32(params->tcp.maxr2t);
9932da066efSJohn Baldwin 
9942da066efSJohn Baldwin 	error = nvmf_tcp_write_pdu(qp, &ic_req, sizeof(ic_req));
9952da066efSJohn Baldwin 	if (error != 0) {
9962da066efSJohn Baldwin 		na_error(na, "Failed to write IC request: %s", strerror(error));
9972da066efSJohn Baldwin 		return (false);
9982da066efSJohn Baldwin 	}
9992da066efSJohn Baldwin 
10002da066efSJohn Baldwin 	if (!nvmf_tcp_read_ic_resp(na, qp, &ic_resp))
10012da066efSJohn Baldwin 		return (false);
10022da066efSJohn Baldwin 
10032da066efSJohn Baldwin 	/* Ensure the controller didn't enable digests we didn't request. */
10042da066efSJohn Baldwin 	if ((!params->tcp.header_digests &&
10052da066efSJohn Baldwin 	    ic_resp.dgst.bits.hdgst_enable != 0) ||
10062da066efSJohn Baldwin 	    (!params->tcp.data_digests &&
10072da066efSJohn Baldwin 	    ic_resp.dgst.bits.ddgst_enable != 0)) {
10082da066efSJohn Baldwin 		na_error(na, "Controller enabled unrequested digests");
10092da066efSJohn Baldwin 		nvmf_tcp_report_error(na, qp,
10102da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER,
10112da066efSJohn Baldwin 		    11, &ic_resp, sizeof(ic_resp), sizeof(ic_resp));
10122da066efSJohn Baldwin 		return (false);
10132da066efSJohn Baldwin 	}
10142da066efSJohn Baldwin 
10152da066efSJohn Baldwin 	/*
10162da066efSJohn Baldwin 	 * XXX: Is there an upper-bound to enforce here?  Perhaps pick
10172da066efSJohn Baldwin 	 * some large value and report larger values as an unsupported
10182da066efSJohn Baldwin 	 * parameter?
10192da066efSJohn Baldwin 	 */
1020fd0e6af5SJohn Baldwin 	maxh2cdata = le32toh(ic_resp.maxh2cdata);
1021fd0e6af5SJohn Baldwin 	if (maxh2cdata < 4096 || maxh2cdata % 4 != 0) {
1022fd0e6af5SJohn Baldwin 		na_error(na, "Invalid MAXH2CDATA %u", maxh2cdata);
10232da066efSJohn Baldwin 		nvmf_tcp_report_error(na, qp,
10242da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 12, &ic_resp,
10252da066efSJohn Baldwin 		    sizeof(ic_resp), sizeof(ic_resp));
10262da066efSJohn Baldwin 		return (false);
10272da066efSJohn Baldwin 	}
10282da066efSJohn Baldwin 
10297b8dd078SJohn Baldwin 	qp->rxpda = (params->tcp.pda + 1) * 4;
10307b8dd078SJohn Baldwin 	qp->txpda = (ic_resp.cpda + 1) * 4;
10312da066efSJohn Baldwin 	qp->header_digests = ic_resp.dgst.bits.hdgst_enable != 0;
10322da066efSJohn Baldwin 	qp->data_digests = ic_resp.dgst.bits.ddgst_enable != 0;
10332da066efSJohn Baldwin 	qp->maxr2t = params->tcp.maxr2t;
1034fd0e6af5SJohn Baldwin 	qp->maxh2cdata = maxh2cdata;
10352da066efSJohn Baldwin 	if (admin)
10362da066efSJohn Baldwin 		/* 7.4.3 */
10372da066efSJohn Baldwin 		qp->max_icd = 8192;
10382da066efSJohn Baldwin 	else
10392da066efSJohn Baldwin 		qp->max_icd = (ta->ioccsz - 4) * 16;
10402da066efSJohn Baldwin 
10412da066efSJohn Baldwin 	return (0);
10422da066efSJohn Baldwin }
10432da066efSJohn Baldwin 
10442da066efSJohn Baldwin static bool
10452da066efSJohn Baldwin tcp_accept(struct nvmf_tcp_qpair *qp, struct nvmf_association *na)
10462da066efSJohn Baldwin {
10472da066efSJohn Baldwin 	const struct nvmf_association_params *params = &na->na_params;
10482da066efSJohn Baldwin 	struct nvme_tcp_ic_req ic_req;
10492da066efSJohn Baldwin 	struct nvme_tcp_ic_resp ic_resp;
10502da066efSJohn Baldwin 	int error;
10512da066efSJohn Baldwin 
10522da066efSJohn Baldwin 	if (!nvmf_tcp_read_ic_req(na, qp, &ic_req))
10532da066efSJohn Baldwin 		return (false);
10542da066efSJohn Baldwin 
10552da066efSJohn Baldwin 	memset(&ic_resp, 0, sizeof(ic_resp));
10562da066efSJohn Baldwin 	ic_resp.common.pdu_type = NVME_TCP_PDU_TYPE_IC_RESP;
10572da066efSJohn Baldwin 	ic_resp.common.hlen = sizeof(ic_req);
10582da066efSJohn Baldwin 	ic_resp.common.plen = htole32(sizeof(ic_req));
10592da066efSJohn Baldwin 	ic_resp.pfv = htole16(0);
10602da066efSJohn Baldwin 	ic_resp.cpda = params->tcp.pda;
10612da066efSJohn Baldwin 	if (params->tcp.header_digests && ic_req.dgst.bits.hdgst_enable != 0)
10622da066efSJohn Baldwin 		ic_resp.dgst.bits.hdgst_enable = 1;
10632da066efSJohn Baldwin 	if (params->tcp.data_digests && ic_req.dgst.bits.ddgst_enable != 0)
10642da066efSJohn Baldwin 		ic_resp.dgst.bits.ddgst_enable = 1;
10652da066efSJohn Baldwin 	ic_resp.maxh2cdata = htole32(params->tcp.maxh2cdata);
10662da066efSJohn Baldwin 
10672da066efSJohn Baldwin 	error = nvmf_tcp_write_pdu(qp, &ic_resp, sizeof(ic_resp));
10682da066efSJohn Baldwin 	if (error != 0) {
10692da066efSJohn Baldwin 		na_error(na, "Failed to write IC response: %s",
10702da066efSJohn Baldwin 		    strerror(error));
10712da066efSJohn Baldwin 		return (false);
10722da066efSJohn Baldwin 	}
10732da066efSJohn Baldwin 
10747b8dd078SJohn Baldwin 	qp->rxpda = (params->tcp.pda + 1) * 4;
10757b8dd078SJohn Baldwin 	qp->txpda = (ic_req.hpda + 1) * 4;
10762da066efSJohn Baldwin 	qp->header_digests = ic_resp.dgst.bits.hdgst_enable != 0;
10772da066efSJohn Baldwin 	qp->data_digests = ic_resp.dgst.bits.ddgst_enable != 0;
10782da066efSJohn Baldwin 	qp->maxr2t = le32toh(ic_req.maxr2t);
10792da066efSJohn Baldwin 	qp->maxh2cdata = params->tcp.maxh2cdata;
10802da066efSJohn Baldwin 	qp->max_icd = 0;	/* XXX */
10812da066efSJohn Baldwin 	return (0);
10822da066efSJohn Baldwin }
10832da066efSJohn Baldwin 
10842da066efSJohn Baldwin static struct nvmf_qpair *
10852da066efSJohn Baldwin tcp_allocate_qpair(struct nvmf_association *na,
10862da066efSJohn Baldwin     const struct nvmf_qpair_params *qparams)
10872da066efSJohn Baldwin {
10882da066efSJohn Baldwin 	const struct nvmf_association_params *aparams = &na->na_params;
10892da066efSJohn Baldwin 	struct nvmf_tcp_qpair *qp;
10902da066efSJohn Baldwin 	int error;
10912da066efSJohn Baldwin 
10922da066efSJohn Baldwin 	if (aparams->tcp.pda > NVME_TCP_CPDA_MAX) {
10932da066efSJohn Baldwin 		na_error(na, "Invalid PDA");
10942da066efSJohn Baldwin 		return (NULL);
10952da066efSJohn Baldwin 	}
10962da066efSJohn Baldwin 
10972da066efSJohn Baldwin 	qp = calloc(1, sizeof(*qp));
10982da066efSJohn Baldwin 	qp->s = qparams->tcp.fd;
10992da066efSJohn Baldwin 	LIST_INIT(&qp->rx_buffers);
11002da066efSJohn Baldwin 	LIST_INIT(&qp->tx_buffers);
11012da066efSJohn Baldwin 	TAILQ_INIT(&qp->rx_capsules);
11022da066efSJohn Baldwin 	if (na->na_controller)
11032da066efSJohn Baldwin 		error = tcp_accept(qp, na);
11042da066efSJohn Baldwin 	else
11052da066efSJohn Baldwin 		error = tcp_connect(qp, na, qparams->admin);
11062da066efSJohn Baldwin 	if (error != 0) {
11072da066efSJohn Baldwin 		free(qp);
11082da066efSJohn Baldwin 		return (NULL);
11092da066efSJohn Baldwin 	}
11102da066efSJohn Baldwin 
11112da066efSJohn Baldwin 	return (&qp->qp);
11122da066efSJohn Baldwin }
11132da066efSJohn Baldwin 
11142da066efSJohn Baldwin static void
11152da066efSJohn Baldwin tcp_free_qpair(struct nvmf_qpair *nq)
11162da066efSJohn Baldwin {
11172da066efSJohn Baldwin 	struct nvmf_tcp_qpair *qp = TQP(nq);
11182da066efSJohn Baldwin 	struct nvmf_tcp_capsule *ntc, *tc;
11192da066efSJohn Baldwin 	struct nvmf_tcp_command_buffer *ncb, *cb;
11202da066efSJohn Baldwin 
11212da066efSJohn Baldwin 	TAILQ_FOREACH_SAFE(tc, &qp->rx_capsules, link, ntc) {
11222da066efSJohn Baldwin 		TAILQ_REMOVE(&qp->rx_capsules, tc, link);
11232da066efSJohn Baldwin 		nvmf_free_capsule(&tc->nc);
11242da066efSJohn Baldwin 	}
11252da066efSJohn Baldwin 	LIST_FOREACH_SAFE(cb, &qp->rx_buffers, link, ncb) {
11262da066efSJohn Baldwin 		tcp_free_command_buffer(cb);
11272da066efSJohn Baldwin 	}
11282da066efSJohn Baldwin 	LIST_FOREACH_SAFE(cb, &qp->tx_buffers, link, ncb) {
11292da066efSJohn Baldwin 		tcp_free_command_buffer(cb);
11302da066efSJohn Baldwin 	}
11312da066efSJohn Baldwin 	free(qp);
11322da066efSJohn Baldwin }
11332da066efSJohn Baldwin 
1134365b89e8SJohn Baldwin static void
1135365b89e8SJohn Baldwin tcp_kernel_handoff_params(struct nvmf_qpair *nq, nvlist_t *nvl)
11362da066efSJohn Baldwin {
11372da066efSJohn Baldwin 	struct nvmf_tcp_qpair *qp = TQP(nq);
11382da066efSJohn Baldwin 
1139365b89e8SJohn Baldwin 	nvlist_add_number(nvl, "fd", qp->s);
1140365b89e8SJohn Baldwin 	nvlist_add_number(nvl, "rxpda", qp->rxpda);
1141365b89e8SJohn Baldwin 	nvlist_add_number(nvl, "txpda", qp->txpda);
1142365b89e8SJohn Baldwin 	nvlist_add_bool(nvl, "header_digests", qp->header_digests);
1143365b89e8SJohn Baldwin 	nvlist_add_bool(nvl, "data_digests", qp->data_digests);
1144365b89e8SJohn Baldwin 	nvlist_add_number(nvl, "maxr2t", qp->maxr2t);
1145365b89e8SJohn Baldwin 	nvlist_add_number(nvl, "maxh2cdata", qp->maxh2cdata);
1146365b89e8SJohn Baldwin 	nvlist_add_number(nvl, "max_icd", qp->max_icd);
11472da066efSJohn Baldwin }
11482da066efSJohn Baldwin 
1149*8bba2c0fSJohn Baldwin static int
1150*8bba2c0fSJohn Baldwin tcp_populate_dle(struct nvmf_qpair *nq, struct nvme_discovery_log_entry *dle)
1151*8bba2c0fSJohn Baldwin {
1152*8bba2c0fSJohn Baldwin 	struct nvmf_tcp_qpair *qp = TQP(nq);
1153*8bba2c0fSJohn Baldwin 	struct sockaddr_storage ss;
1154*8bba2c0fSJohn Baldwin 	socklen_t ss_len;
1155*8bba2c0fSJohn Baldwin 
1156*8bba2c0fSJohn Baldwin 	ss_len = sizeof(ss);
1157*8bba2c0fSJohn Baldwin 	if (getpeername(qp->s, (struct sockaddr *)&ss, &ss_len) == -1)
1158*8bba2c0fSJohn Baldwin 		return (errno);
1159*8bba2c0fSJohn Baldwin 
1160*8bba2c0fSJohn Baldwin 	if (getnameinfo((struct sockaddr *)&ss, ss_len, dle->traddr,
1161*8bba2c0fSJohn Baldwin 	    sizeof(dle->traddr), dle->trsvcid, sizeof(dle->trsvcid),
1162*8bba2c0fSJohn Baldwin 	    NI_NUMERICHOST | NI_NUMERICSERV) != 0)
1163*8bba2c0fSJohn Baldwin 		return (EINVAL);
1164*8bba2c0fSJohn Baldwin 
1165*8bba2c0fSJohn Baldwin 	return (0);
1166*8bba2c0fSJohn Baldwin }
1167*8bba2c0fSJohn Baldwin 
11682da066efSJohn Baldwin static struct nvmf_capsule *
11692da066efSJohn Baldwin tcp_allocate_capsule(struct nvmf_qpair *qp __unused)
11702da066efSJohn Baldwin {
11712da066efSJohn Baldwin 	struct nvmf_tcp_capsule *nc;
11722da066efSJohn Baldwin 
11732da066efSJohn Baldwin 	nc = calloc(1, sizeof(*nc));
11742da066efSJohn Baldwin 	return (&nc->nc);
11752da066efSJohn Baldwin }
11762da066efSJohn Baldwin 
11772da066efSJohn Baldwin static void
11782da066efSJohn Baldwin tcp_free_capsule(struct nvmf_capsule *nc)
11792da066efSJohn Baldwin {
11802da066efSJohn Baldwin 	struct nvmf_tcp_capsule *tc = TCAP(nc);
11812da066efSJohn Baldwin 
11822da066efSJohn Baldwin 	nvmf_tcp_free_pdu(&tc->rx_pdu);
11832da066efSJohn Baldwin 	if (tc->cb != NULL)
11842da066efSJohn Baldwin 		tcp_free_command_buffer(tc->cb);
11852da066efSJohn Baldwin 	free(tc);
11862da066efSJohn Baldwin }
11872da066efSJohn Baldwin 
11882da066efSJohn Baldwin static int
11892da066efSJohn Baldwin tcp_transmit_command(struct nvmf_capsule *nc)
11902da066efSJohn Baldwin {
11912da066efSJohn Baldwin 	struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair);
11922da066efSJohn Baldwin 	struct nvmf_tcp_capsule *tc = TCAP(nc);
11932da066efSJohn Baldwin 	struct nvme_tcp_cmd cmd;
11942da066efSJohn Baldwin 	struct nvme_sgl_descriptor *sgl;
11952da066efSJohn Baldwin 	int error;
11962da066efSJohn Baldwin 	bool use_icd;
11972da066efSJohn Baldwin 
11982da066efSJohn Baldwin 	use_icd = false;
11992da066efSJohn Baldwin 	if (nc->nc_data_len != 0 && nc->nc_send_data &&
12002da066efSJohn Baldwin 	    nc->nc_data_len <= qp->max_icd)
12012da066efSJohn Baldwin 		use_icd = true;
12022da066efSJohn Baldwin 
12032da066efSJohn Baldwin 	memset(&cmd, 0, sizeof(cmd));
12042da066efSJohn Baldwin 	cmd.common.pdu_type = NVME_TCP_PDU_TYPE_CAPSULE_CMD;
12052da066efSJohn Baldwin 	cmd.ccsqe = nc->nc_sqe;
12062da066efSJohn Baldwin 
12072da066efSJohn Baldwin 	/* Populate SGL in SQE. */
12082da066efSJohn Baldwin 	sgl = &cmd.ccsqe.sgl;
12092da066efSJohn Baldwin 	memset(sgl, 0, sizeof(*sgl));
12102da066efSJohn Baldwin 	sgl->address = 0;
12112da066efSJohn Baldwin 	sgl->length = htole32(nc->nc_data_len);
12122da066efSJohn Baldwin 	if (use_icd) {
12132da066efSJohn Baldwin 		/* Use in-capsule data. */
12142da066efSJohn Baldwin 		sgl->type = NVME_SGL_TYPE_ICD;
12152da066efSJohn Baldwin 	} else {
12162da066efSJohn Baldwin 		/* Use a command buffer. */
12172da066efSJohn Baldwin 		sgl->type = NVME_SGL_TYPE_COMMAND_BUFFER;
12182da066efSJohn Baldwin 	}
12192da066efSJohn Baldwin 
12202da066efSJohn Baldwin 	/* Send command capsule. */
12212da066efSJohn Baldwin 	error = nvmf_tcp_construct_pdu(qp, &cmd, sizeof(cmd), use_icd ?
12222da066efSJohn Baldwin 	    nc->nc_data : NULL, use_icd ? nc->nc_data_len : 0);
12232da066efSJohn Baldwin 	if (error != 0)
12242da066efSJohn Baldwin 		return (error);
12252da066efSJohn Baldwin 
12262da066efSJohn Baldwin 	/*
12272da066efSJohn Baldwin 	 * If data will be transferred using a command buffer, allocate a
12282da066efSJohn Baldwin 	 * buffer structure and queue it.
12292da066efSJohn Baldwin 	 */
12302da066efSJohn Baldwin 	if (nc->nc_data_len != 0 && !use_icd)
12312da066efSJohn Baldwin 		tc->cb = tcp_alloc_command_buffer(qp, nc->nc_data, 0,
12322da066efSJohn Baldwin 		    nc->nc_data_len, cmd.ccsqe.cid, 0, !nc->nc_send_data);
12332da066efSJohn Baldwin 
12342da066efSJohn Baldwin 	return (0);
12352da066efSJohn Baldwin }
12362da066efSJohn Baldwin 
12372da066efSJohn Baldwin static int
12382da066efSJohn Baldwin tcp_transmit_response(struct nvmf_capsule *nc)
12392da066efSJohn Baldwin {
12402da066efSJohn Baldwin 	struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair);
12412da066efSJohn Baldwin 	struct nvme_tcp_rsp rsp;
12422da066efSJohn Baldwin 
12432da066efSJohn Baldwin 	memset(&rsp, 0, sizeof(rsp));
12442da066efSJohn Baldwin 	rsp.common.pdu_type = NVME_TCP_PDU_TYPE_CAPSULE_RESP;
12452da066efSJohn Baldwin 	rsp.rccqe = nc->nc_cqe;
12462da066efSJohn Baldwin 
12472da066efSJohn Baldwin 	return (nvmf_tcp_construct_pdu(qp, &rsp, sizeof(rsp), NULL, 0));
12482da066efSJohn Baldwin }
12492da066efSJohn Baldwin 
12502da066efSJohn Baldwin static int
12512da066efSJohn Baldwin tcp_transmit_capsule(struct nvmf_capsule *nc)
12522da066efSJohn Baldwin {
12532da066efSJohn Baldwin 	if (nc->nc_qe_len == sizeof(struct nvme_command))
12542da066efSJohn Baldwin 		return (tcp_transmit_command(nc));
12552da066efSJohn Baldwin 	else
12562da066efSJohn Baldwin 		return (tcp_transmit_response(nc));
12572da066efSJohn Baldwin }
12582da066efSJohn Baldwin 
12592da066efSJohn Baldwin static int
12602da066efSJohn Baldwin tcp_receive_capsule(struct nvmf_qpair *nq, struct nvmf_capsule **ncp)
12612da066efSJohn Baldwin {
12622da066efSJohn Baldwin 	struct nvmf_tcp_qpair *qp = TQP(nq);
12632da066efSJohn Baldwin 	struct nvmf_tcp_capsule *tc;
12642da066efSJohn Baldwin 	int error;
12652da066efSJohn Baldwin 
12662da066efSJohn Baldwin 	while (TAILQ_EMPTY(&qp->rx_capsules)) {
12672da066efSJohn Baldwin 		error = nvmf_tcp_receive_pdu(qp);
12682da066efSJohn Baldwin 		if (error != 0)
12692da066efSJohn Baldwin 			return (error);
12702da066efSJohn Baldwin 	}
12712da066efSJohn Baldwin 	tc = TAILQ_FIRST(&qp->rx_capsules);
12722da066efSJohn Baldwin 	TAILQ_REMOVE(&qp->rx_capsules, tc, link);
12732da066efSJohn Baldwin 	*ncp = &tc->nc;
12742da066efSJohn Baldwin 	return (0);
12752da066efSJohn Baldwin }
12762da066efSJohn Baldwin 
12772da066efSJohn Baldwin static uint8_t
12782da066efSJohn Baldwin tcp_validate_command_capsule(const struct nvmf_capsule *nc)
12792da066efSJohn Baldwin {
12802da066efSJohn Baldwin 	const struct nvmf_tcp_capsule *tc = CTCAP(nc);
12812da066efSJohn Baldwin 	const struct nvme_sgl_descriptor *sgl;
12822da066efSJohn Baldwin 
12832da066efSJohn Baldwin 	assert(tc->rx_pdu.hdr != NULL);
12842da066efSJohn Baldwin 
12852da066efSJohn Baldwin 	sgl = &nc->nc_sqe.sgl;
12862da066efSJohn Baldwin 	switch (sgl->type) {
12872da066efSJohn Baldwin 	case NVME_SGL_TYPE_ICD:
12882da066efSJohn Baldwin 		if (tc->rx_pdu.data_len != le32toh(sgl->length)) {
12892da066efSJohn Baldwin 			printf("NVMe/TCP: Command Capsule with mismatched ICD length\n");
12902da066efSJohn Baldwin 			return (NVME_SC_DATA_SGL_LENGTH_INVALID);
12912da066efSJohn Baldwin 		}
12922da066efSJohn Baldwin 		break;
12932da066efSJohn Baldwin 	case NVME_SGL_TYPE_COMMAND_BUFFER:
12942da066efSJohn Baldwin 		if (tc->rx_pdu.data_len != 0) {
12952da066efSJohn Baldwin 			printf("NVMe/TCP: Command Buffer SGL with ICD\n");
12962da066efSJohn Baldwin 			return (NVME_SC_INVALID_FIELD);
12972da066efSJohn Baldwin 		}
12982da066efSJohn Baldwin 		break;
12992da066efSJohn Baldwin 	default:
13002da066efSJohn Baldwin 		printf("NVMe/TCP: Invalid SGL type in Command Capsule\n");
13012da066efSJohn Baldwin 		return (NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID);
13022da066efSJohn Baldwin 	}
13032da066efSJohn Baldwin 
13042da066efSJohn Baldwin 	if (sgl->address != 0) {
13052da066efSJohn Baldwin 		printf("NVMe/TCP: Invalid SGL offset in Command Capsule\n");
13062da066efSJohn Baldwin 		return (NVME_SC_SGL_OFFSET_INVALID);
13072da066efSJohn Baldwin 	}
13082da066efSJohn Baldwin 
13092da066efSJohn Baldwin 	return (NVME_SC_SUCCESS);
13102da066efSJohn Baldwin }
13112da066efSJohn Baldwin 
13122da066efSJohn Baldwin static size_t
13132da066efSJohn Baldwin tcp_capsule_data_len(const struct nvmf_capsule *nc)
13142da066efSJohn Baldwin {
13152da066efSJohn Baldwin 	assert(nc->nc_qe_len == sizeof(struct nvme_command));
13162da066efSJohn Baldwin 	return (le32toh(nc->nc_sqe.sgl.length));
13172da066efSJohn Baldwin }
13182da066efSJohn Baldwin 
13192da066efSJohn Baldwin /* NB: cid and ttag are both little-endian already. */
13202da066efSJohn Baldwin static int
13212da066efSJohn Baldwin tcp_send_r2t(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag,
13222da066efSJohn Baldwin     uint32_t data_offset, uint32_t data_len)
13232da066efSJohn Baldwin {
13242da066efSJohn Baldwin 	struct nvme_tcp_r2t_hdr r2t;
13252da066efSJohn Baldwin 
13262da066efSJohn Baldwin 	memset(&r2t, 0, sizeof(r2t));
13272da066efSJohn Baldwin 	r2t.common.pdu_type = NVME_TCP_PDU_TYPE_R2T;
13282da066efSJohn Baldwin 	r2t.cccid = cid;
13292da066efSJohn Baldwin 	r2t.ttag = ttag;
13302da066efSJohn Baldwin 	r2t.r2to = htole32(data_offset);
13312da066efSJohn Baldwin 	r2t.r2tl = htole32(data_len);
13322da066efSJohn Baldwin 
13332da066efSJohn Baldwin 	return (nvmf_tcp_construct_pdu(qp, &r2t, sizeof(r2t), NULL, 0));
13342da066efSJohn Baldwin }
13352da066efSJohn Baldwin 
13362da066efSJohn Baldwin static int
13372da066efSJohn Baldwin tcp_receive_r2t_data(const struct nvmf_capsule *nc, uint32_t data_offset,
13382da066efSJohn Baldwin     void *buf, size_t len)
13392da066efSJohn Baldwin {
13402da066efSJohn Baldwin 	struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair);
13412da066efSJohn Baldwin 	struct nvmf_tcp_command_buffer *cb;
13422da066efSJohn Baldwin 	int error;
13432da066efSJohn Baldwin 	uint16_t ttag;
13442da066efSJohn Baldwin 
13452da066efSJohn Baldwin 	/*
13462da066efSJohn Baldwin 	 * Don't bother byte-swapping ttag as it is just a cookie
13472da066efSJohn Baldwin 	 * value returned by the other end as-is.
13482da066efSJohn Baldwin 	 */
13492da066efSJohn Baldwin 	ttag = qp->next_ttag++;
13502da066efSJohn Baldwin 
13512da066efSJohn Baldwin 	error = tcp_send_r2t(qp, nc->nc_sqe.cid, ttag, data_offset, len);
13522da066efSJohn Baldwin 	if (error != 0)
13532da066efSJohn Baldwin 		return (error);
13542da066efSJohn Baldwin 
13552da066efSJohn Baldwin 	cb = tcp_alloc_command_buffer(qp, buf, data_offset, len,
13562da066efSJohn Baldwin 	    nc->nc_sqe.cid, ttag, true);
13572da066efSJohn Baldwin 
13582da066efSJohn Baldwin 	/* Parse received PDUs until the data transfer is complete. */
13592da066efSJohn Baldwin 	while (cb->data_xfered < cb->data_len) {
13602da066efSJohn Baldwin 		error = nvmf_tcp_receive_pdu(qp);
13612da066efSJohn Baldwin 		if (error != 0)
13622da066efSJohn Baldwin 			break;
13632da066efSJohn Baldwin 	}
13642da066efSJohn Baldwin 	tcp_free_command_buffer(cb);
13652da066efSJohn Baldwin 	return (error);
13662da066efSJohn Baldwin }
13672da066efSJohn Baldwin 
13682da066efSJohn Baldwin static int
13692da066efSJohn Baldwin tcp_receive_icd_data(const struct nvmf_capsule *nc, uint32_t data_offset,
13702da066efSJohn Baldwin     void *buf, size_t len)
13712da066efSJohn Baldwin {
13722da066efSJohn Baldwin 	const struct nvmf_tcp_capsule *tc = CTCAP(nc);
13732da066efSJohn Baldwin 	const char *icd;
13742da066efSJohn Baldwin 
13752da066efSJohn Baldwin 	icd = (const char *)tc->rx_pdu.hdr + tc->rx_pdu.hdr->pdo + data_offset;
13762da066efSJohn Baldwin 	memcpy(buf, icd, len);
13772da066efSJohn Baldwin 	return (0);
13782da066efSJohn Baldwin }
13792da066efSJohn Baldwin 
13802da066efSJohn Baldwin static int
13812da066efSJohn Baldwin tcp_receive_controller_data(const struct nvmf_capsule *nc, uint32_t data_offset,
13822da066efSJohn Baldwin     void *buf, size_t len)
13832da066efSJohn Baldwin {
13842da066efSJohn Baldwin 	struct nvmf_association *na = nc->nc_qpair->nq_association;
13852da066efSJohn Baldwin 	const struct nvme_sgl_descriptor *sgl;
13862da066efSJohn Baldwin 	size_t data_len;
13872da066efSJohn Baldwin 
13882da066efSJohn Baldwin 	if (nc->nc_qe_len != sizeof(struct nvme_command) || !na->na_controller)
13892da066efSJohn Baldwin 		return (EINVAL);
13902da066efSJohn Baldwin 
13912da066efSJohn Baldwin 	sgl = &nc->nc_sqe.sgl;
13922da066efSJohn Baldwin 	data_len = le32toh(sgl->length);
13932da066efSJohn Baldwin 	if (data_offset + len > data_len)
13942da066efSJohn Baldwin 		return (EFBIG);
13952da066efSJohn Baldwin 
13962da066efSJohn Baldwin 	if (sgl->type == NVME_SGL_TYPE_ICD)
13972da066efSJohn Baldwin 		return (tcp_receive_icd_data(nc, data_offset, buf, len));
13982da066efSJohn Baldwin 	else
13992da066efSJohn Baldwin 		return (tcp_receive_r2t_data(nc, data_offset, buf, len));
14002da066efSJohn Baldwin }
14012da066efSJohn Baldwin 
14022da066efSJohn Baldwin /* NB: cid is little-endian already. */
14032da066efSJohn Baldwin static int
14042da066efSJohn Baldwin tcp_send_c2h_pdu(struct nvmf_tcp_qpair *qp, uint16_t cid,
14052da066efSJohn Baldwin     uint32_t data_offset, const void *buf, size_t len, bool last_pdu,
14062da066efSJohn Baldwin     bool success)
14072da066efSJohn Baldwin {
14082da066efSJohn Baldwin 	struct nvme_tcp_c2h_data_hdr c2h;
14092da066efSJohn Baldwin 
14102da066efSJohn Baldwin 	memset(&c2h, 0, sizeof(c2h));
14112da066efSJohn Baldwin 	c2h.common.pdu_type = NVME_TCP_PDU_TYPE_C2H_DATA;
14122da066efSJohn Baldwin 	if (last_pdu)
14132da066efSJohn Baldwin 		c2h.common.flags |= NVME_TCP_C2H_DATA_FLAGS_LAST_PDU;
14142da066efSJohn Baldwin 	if (success)
14152da066efSJohn Baldwin 		c2h.common.flags |= NVME_TCP_C2H_DATA_FLAGS_SUCCESS;
14162da066efSJohn Baldwin 	c2h.cccid = cid;
14172da066efSJohn Baldwin 	c2h.datao = htole32(data_offset);
14182da066efSJohn Baldwin 	c2h.datal = htole32(len);
14192da066efSJohn Baldwin 
14202da066efSJohn Baldwin 	return (nvmf_tcp_construct_pdu(qp, &c2h, sizeof(c2h),
14212da066efSJohn Baldwin 	    __DECONST(void *, buf), len));
14222da066efSJohn Baldwin }
14232da066efSJohn Baldwin 
14242da066efSJohn Baldwin static int
14252da066efSJohn Baldwin tcp_send_controller_data(const struct nvmf_capsule *nc, const void *buf,
14262da066efSJohn Baldwin     size_t len)
14272da066efSJohn Baldwin {
14282da066efSJohn Baldwin 	struct nvmf_association *na = nc->nc_qpair->nq_association;
14292da066efSJohn Baldwin 	struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair);
14302da066efSJohn Baldwin 	const struct nvme_sgl_descriptor *sgl;
14312da066efSJohn Baldwin 	const char *src;
14322da066efSJohn Baldwin 	size_t todo;
14332da066efSJohn Baldwin 	uint32_t data_len, data_offset;
14342da066efSJohn Baldwin 	int error;
14352da066efSJohn Baldwin 	bool last_pdu, send_success_flag;
14362da066efSJohn Baldwin 
14372da066efSJohn Baldwin 	if (nc->nc_qe_len != sizeof(struct nvme_command) || !na->na_controller)
14382da066efSJohn Baldwin 		return (EINVAL);
14392da066efSJohn Baldwin 
14402da066efSJohn Baldwin 	sgl = &nc->nc_sqe.sgl;
14412da066efSJohn Baldwin 	data_len = le32toh(sgl->length);
14422da066efSJohn Baldwin 	if (len != data_len) {
14432da066efSJohn Baldwin 		nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD);
14442da066efSJohn Baldwin 		return (EFBIG);
14452da066efSJohn Baldwin 	}
14462da066efSJohn Baldwin 
14472da066efSJohn Baldwin 	if (sgl->type != NVME_SGL_TYPE_COMMAND_BUFFER) {
14482da066efSJohn Baldwin 		nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD);
14492da066efSJohn Baldwin 		return (EINVAL);
14502da066efSJohn Baldwin 	}
14512da066efSJohn Baldwin 
14522da066efSJohn Baldwin 	/* Use the SUCCESS flag if SQ flow control is disabled. */
14532da066efSJohn Baldwin 	send_success_flag = !qp->qp.nq_flow_control;
14542da066efSJohn Baldwin 
14552da066efSJohn Baldwin 	/*
14562da066efSJohn Baldwin 	 * Write out one or more C2H_DATA PDUs containing the data.
14572da066efSJohn Baldwin 	 * Each PDU is arbitrarily capped at 256k.
14582da066efSJohn Baldwin 	 */
14592da066efSJohn Baldwin 	data_offset = 0;
14602da066efSJohn Baldwin 	src = buf;
14612da066efSJohn Baldwin 	while (len > 0) {
14622da066efSJohn Baldwin 		if (len > 256 * 1024) {
14632da066efSJohn Baldwin 			todo = 256 * 1024;
14642da066efSJohn Baldwin 			last_pdu = false;
14652da066efSJohn Baldwin 		} else {
14662da066efSJohn Baldwin 			todo = len;
14672da066efSJohn Baldwin 			last_pdu = true;
14682da066efSJohn Baldwin 		}
14692da066efSJohn Baldwin 		error = tcp_send_c2h_pdu(qp, nc->nc_sqe.cid, data_offset,
14702da066efSJohn Baldwin 		    src, todo, last_pdu, last_pdu && send_success_flag);
14712da066efSJohn Baldwin 		if (error != 0) {
14722da066efSJohn Baldwin 			nvmf_send_generic_error(nc,
14732da066efSJohn Baldwin 			    NVME_SC_TRANSIENT_TRANSPORT_ERROR);
14742da066efSJohn Baldwin 			return (error);
14752da066efSJohn Baldwin 		}
14762da066efSJohn Baldwin 		data_offset += todo;
14772da066efSJohn Baldwin 		src += todo;
14782da066efSJohn Baldwin 		len -= todo;
14792da066efSJohn Baldwin 	}
14802da066efSJohn Baldwin 	if (!send_success_flag)
14812da066efSJohn Baldwin 		nvmf_send_success(nc);
14822da066efSJohn Baldwin 	return (0);
14832da066efSJohn Baldwin }
14842da066efSJohn Baldwin 
14852da066efSJohn Baldwin struct nvmf_transport_ops tcp_ops = {
14862da066efSJohn Baldwin 	.allocate_association = tcp_allocate_association,
14872da066efSJohn Baldwin 	.update_association = tcp_update_association,
14882da066efSJohn Baldwin 	.free_association = tcp_free_association,
14892da066efSJohn Baldwin 	.allocate_qpair = tcp_allocate_qpair,
14902da066efSJohn Baldwin 	.free_qpair = tcp_free_qpair,
14912da066efSJohn Baldwin 	.kernel_handoff_params = tcp_kernel_handoff_params,
1492*8bba2c0fSJohn Baldwin 	.populate_dle = tcp_populate_dle,
14932da066efSJohn Baldwin 	.allocate_capsule = tcp_allocate_capsule,
14942da066efSJohn Baldwin 	.free_capsule = tcp_free_capsule,
14952da066efSJohn Baldwin 	.transmit_capsule = tcp_transmit_capsule,
14962da066efSJohn Baldwin 	.receive_capsule = tcp_receive_capsule,
14972da066efSJohn Baldwin 	.validate_command_capsule = tcp_validate_command_capsule,
14982da066efSJohn Baldwin 	.capsule_data_len = tcp_capsule_data_len,
14992da066efSJohn Baldwin 	.receive_controller_data = tcp_receive_controller_data,
15002da066efSJohn Baldwin 	.send_controller_data = tcp_send_controller_data,
15012da066efSJohn Baldwin };
1502