xref: /onnv-gate/usr/src/uts/common/io/ib/clients/rdsv3/message.c (revision 12763:4b30642bc04e)
112198SEiji.Ota@Sun.COM /*
212198SEiji.Ota@Sun.COM  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
312198SEiji.Ota@Sun.COM  */
412198SEiji.Ota@Sun.COM 
512198SEiji.Ota@Sun.COM /*
6*12763SGiri.Adari@Sun.COM  * This file contains code imported from the OFED rds source file message.c
7*12763SGiri.Adari@Sun.COM  * Oracle elects to have and use the contents of message.c under and governed
8*12763SGiri.Adari@Sun.COM  * by the OpenIB.org BSD license (see below for full license text). However,
9*12763SGiri.Adari@Sun.COM  * the following notice accompanied the original version of this file:
10*12763SGiri.Adari@Sun.COM  */
11*12763SGiri.Adari@Sun.COM 
12*12763SGiri.Adari@Sun.COM /*
1312198SEiji.Ota@Sun.COM  * Copyright (c) 2006 Oracle.  All rights reserved.
1412198SEiji.Ota@Sun.COM  *
1512198SEiji.Ota@Sun.COM  * This software is available to you under a choice of one of two
1612198SEiji.Ota@Sun.COM  * licenses.  You may choose to be licensed under the terms of the GNU
1712198SEiji.Ota@Sun.COM  * General Public License (GPL) Version 2, available from the file
1812198SEiji.Ota@Sun.COM  * COPYING in the main directory of this source tree, or the
1912198SEiji.Ota@Sun.COM  * OpenIB.org BSD license below:
2012198SEiji.Ota@Sun.COM  *
2112198SEiji.Ota@Sun.COM  *     Redistribution and use in source and binary forms, with or
2212198SEiji.Ota@Sun.COM  *     without modification, are permitted provided that the following
2312198SEiji.Ota@Sun.COM  *     conditions are met:
2412198SEiji.Ota@Sun.COM  *
2512198SEiji.Ota@Sun.COM  *      - Redistributions of source code must retain the above
2612198SEiji.Ota@Sun.COM  *        copyright notice, this list of conditions and the following
2712198SEiji.Ota@Sun.COM  *        disclaimer.
2812198SEiji.Ota@Sun.COM  *
2912198SEiji.Ota@Sun.COM  *      - Redistributions in binary form must reproduce the above
3012198SEiji.Ota@Sun.COM  *        copyright notice, this list of conditions and the following
3112198SEiji.Ota@Sun.COM  *        disclaimer in the documentation and/or other materials
3212198SEiji.Ota@Sun.COM  *        provided with the distribution.
3312198SEiji.Ota@Sun.COM  *
3412198SEiji.Ota@Sun.COM  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
3512198SEiji.Ota@Sun.COM  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
3612198SEiji.Ota@Sun.COM  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
3712198SEiji.Ota@Sun.COM  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
3812198SEiji.Ota@Sun.COM  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
3912198SEiji.Ota@Sun.COM  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
4012198SEiji.Ota@Sun.COM  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
4112198SEiji.Ota@Sun.COM  * SOFTWARE.
4212198SEiji.Ota@Sun.COM  *
4312198SEiji.Ota@Sun.COM  */
4412198SEiji.Ota@Sun.COM #include <sys/rds.h>
4512198SEiji.Ota@Sun.COM 
4612198SEiji.Ota@Sun.COM #include <sys/ib/clients/rdsv3/rdsv3.h>
4712198SEiji.Ota@Sun.COM #include <sys/ib/clients/rdsv3/rdma.h>
4812198SEiji.Ota@Sun.COM #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
4912198SEiji.Ota@Sun.COM 
5012198SEiji.Ota@Sun.COM #ifndef __lock_lint
5112198SEiji.Ota@Sun.COM static unsigned int	rdsv3_exthdr_size[__RDSV3_EXTHDR_MAX] = {
5212198SEiji.Ota@Sun.COM [RDSV3_EXTHDR_NONE]	= 0,
5312198SEiji.Ota@Sun.COM [RDSV3_EXTHDR_VERSION]	= sizeof (struct rdsv3_ext_header_version),
5412198SEiji.Ota@Sun.COM [RDSV3_EXTHDR_RDMA]	= sizeof (struct rdsv3_ext_header_rdma),
5512198SEiji.Ota@Sun.COM [RDSV3_EXTHDR_RDMA_DEST]	= sizeof (struct rdsv3_ext_header_rdma_dest),
5612198SEiji.Ota@Sun.COM };
5712198SEiji.Ota@Sun.COM #else
5812198SEiji.Ota@Sun.COM static unsigned int	rdsv3_exthdr_size[__RDSV3_EXTHDR_MAX] = {
5912198SEiji.Ota@Sun.COM 			0,
6012198SEiji.Ota@Sun.COM 			sizeof (struct rdsv3_ext_header_version),
6112198SEiji.Ota@Sun.COM 			sizeof (struct rdsv3_ext_header_rdma),
6212198SEiji.Ota@Sun.COM 			sizeof (struct rdsv3_ext_header_rdma_dest),
6312198SEiji.Ota@Sun.COM };
6412198SEiji.Ota@Sun.COM #endif
6512198SEiji.Ota@Sun.COM 
6612198SEiji.Ota@Sun.COM void
rdsv3_message_addref(struct rdsv3_message * rm)6712198SEiji.Ota@Sun.COM rdsv3_message_addref(struct rdsv3_message *rm)
6812198SEiji.Ota@Sun.COM {
6912198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF5("rdsv3_message_addref", "addref rm %p ref %d",
7012198SEiji.Ota@Sun.COM 	    rm, atomic_get(&rm->m_refcount));
7112198SEiji.Ota@Sun.COM 	atomic_add_32(&rm->m_refcount, 1);
7212198SEiji.Ota@Sun.COM }
7312198SEiji.Ota@Sun.COM 
7412198SEiji.Ota@Sun.COM /*
7512198SEiji.Ota@Sun.COM  * This relies on dma_map_sg() not touching sg[].page during merging.
7612198SEiji.Ota@Sun.COM  */
7712198SEiji.Ota@Sun.COM static void
rdsv3_message_purge(struct rdsv3_message * rm)7812198SEiji.Ota@Sun.COM rdsv3_message_purge(struct rdsv3_message *rm)
7912198SEiji.Ota@Sun.COM {
8012198SEiji.Ota@Sun.COM 	unsigned long i;
8112198SEiji.Ota@Sun.COM 
8212198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF4("rdsv3_message_purge", "Enter(rm: %p)", rm);
8312198SEiji.Ota@Sun.COM 
8412198SEiji.Ota@Sun.COM 	if (test_bit(RDSV3_MSG_PAGEVEC, &rm->m_flags))
8512198SEiji.Ota@Sun.COM 		return;
8612198SEiji.Ota@Sun.COM 
8712198SEiji.Ota@Sun.COM 	for (i = 0; i < rm->m_nents; i++) {
8812198SEiji.Ota@Sun.COM 		RDSV3_DPRINTF5("rdsv3_message_purge", "putting data page %p\n",
8912198SEiji.Ota@Sun.COM 		    (void *)rdsv3_sg_page(&rm->m_sg[i]));
9012198SEiji.Ota@Sun.COM 		/* XXX will have to put_page for page refs */
9112198SEiji.Ota@Sun.COM 		kmem_free(rdsv3_sg_page(&rm->m_sg[i]),
9212198SEiji.Ota@Sun.COM 		    rdsv3_sg_len(&rm->m_sg[i]));
9312198SEiji.Ota@Sun.COM 	}
9412198SEiji.Ota@Sun.COM 
9512198SEiji.Ota@Sun.COM 	if (rm->m_rdma_op)
9612198SEiji.Ota@Sun.COM 		rdsv3_rdma_free_op(rm->m_rdma_op);
9712198SEiji.Ota@Sun.COM 	if (rm->m_rdma_mr) {
9812198SEiji.Ota@Sun.COM 		struct rdsv3_mr *mr = rm->m_rdma_mr;
9912198SEiji.Ota@Sun.COM 		if (mr->r_refcount == 0) {
10012198SEiji.Ota@Sun.COM 			RDSV3_DPRINTF4("rdsv3_message_purge ASSERT 0",
10112198SEiji.Ota@Sun.COM 			    "rm %p mr %p", rm, mr);
10212198SEiji.Ota@Sun.COM 			return;
10312198SEiji.Ota@Sun.COM 		}
10412198SEiji.Ota@Sun.COM 		if (mr->r_refcount == 0xdeadbeef) {
10512198SEiji.Ota@Sun.COM 			RDSV3_DPRINTF4("rdsv3_message_purge ASSERT deadbeef",
10612198SEiji.Ota@Sun.COM 			    "rm %p mr %p", rm, mr);
10712198SEiji.Ota@Sun.COM 			return;
10812198SEiji.Ota@Sun.COM 		}
10912198SEiji.Ota@Sun.COM 		if (atomic_dec_and_test(&mr->r_refcount)) {
11012198SEiji.Ota@Sun.COM 			rm->m_rdma_mr = NULL;
11112198SEiji.Ota@Sun.COM 			__rdsv3_put_mr_final(mr);
11212198SEiji.Ota@Sun.COM 		}
11312198SEiji.Ota@Sun.COM 	}
11412198SEiji.Ota@Sun.COM 
11512198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF4("rdsv3_message_purge", "Return(rm: %p)", rm);
11612198SEiji.Ota@Sun.COM 
11712198SEiji.Ota@Sun.COM }
11812198SEiji.Ota@Sun.COM 
11912198SEiji.Ota@Sun.COM void
rdsv3_message_put(struct rdsv3_message * rm)12012198SEiji.Ota@Sun.COM rdsv3_message_put(struct rdsv3_message *rm)
12112198SEiji.Ota@Sun.COM {
12212198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF5("rdsv3_message_put",
12312198SEiji.Ota@Sun.COM 	    "put rm %p ref %d\n", rm, atomic_get(&rm->m_refcount));
12412198SEiji.Ota@Sun.COM 
12512198SEiji.Ota@Sun.COM 	if (atomic_dec_and_test(&rm->m_refcount)) {
12612198SEiji.Ota@Sun.COM 		ASSERT(!list_link_active(&rm->m_sock_item));
12712198SEiji.Ota@Sun.COM 		ASSERT(!list_link_active(&rm->m_conn_item));
12812198SEiji.Ota@Sun.COM 		rdsv3_message_purge(rm);
12912198SEiji.Ota@Sun.COM 
13012198SEiji.Ota@Sun.COM 		kmem_free(rm, sizeof (struct rdsv3_message) +
13112198SEiji.Ota@Sun.COM 		    (rm->m_nents * sizeof (struct rdsv3_scatterlist)));
13212198SEiji.Ota@Sun.COM 	}
13312198SEiji.Ota@Sun.COM }
13412198SEiji.Ota@Sun.COM 
13512198SEiji.Ota@Sun.COM void
rdsv3_message_inc_free(struct rdsv3_incoming * inc)13612198SEiji.Ota@Sun.COM rdsv3_message_inc_free(struct rdsv3_incoming *inc)
13712198SEiji.Ota@Sun.COM {
13812198SEiji.Ota@Sun.COM 	struct rdsv3_message *rm =
13912198SEiji.Ota@Sun.COM 	    container_of(inc, struct rdsv3_message, m_inc);
14012198SEiji.Ota@Sun.COM 	rdsv3_message_put(rm);
14112198SEiji.Ota@Sun.COM }
14212198SEiji.Ota@Sun.COM 
14312198SEiji.Ota@Sun.COM void
rdsv3_message_populate_header(struct rdsv3_header * hdr,uint16_be_t sport,uint16_be_t dport,uint64_t seq)14412198SEiji.Ota@Sun.COM rdsv3_message_populate_header(struct rdsv3_header *hdr, uint16_be_t sport,
14512198SEiji.Ota@Sun.COM     uint16_be_t dport, uint64_t seq)
14612198SEiji.Ota@Sun.COM {
14712198SEiji.Ota@Sun.COM 	hdr->h_flags = 0;
14812198SEiji.Ota@Sun.COM 	hdr->h_sport = sport;
14912198SEiji.Ota@Sun.COM 	hdr->h_dport = dport;
15012198SEiji.Ota@Sun.COM 	hdr->h_sequence = htonll(seq);
15112198SEiji.Ota@Sun.COM 	hdr->h_exthdr[0] = RDSV3_EXTHDR_NONE;
15212198SEiji.Ota@Sun.COM }
15312198SEiji.Ota@Sun.COM 
15412198SEiji.Ota@Sun.COM int
rdsv3_message_add_extension(struct rdsv3_header * hdr,unsigned int type,const void * data,unsigned int len)15512198SEiji.Ota@Sun.COM rdsv3_message_add_extension(struct rdsv3_header *hdr,
15612198SEiji.Ota@Sun.COM     unsigned int type, const void *data, unsigned int len)
15712198SEiji.Ota@Sun.COM {
15812198SEiji.Ota@Sun.COM 	unsigned int ext_len = sizeof (uint8_t) + len;
15912198SEiji.Ota@Sun.COM 	unsigned char *dst;
16012198SEiji.Ota@Sun.COM 
16112198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF4("rdsv3_message_add_extension", "Enter");
16212198SEiji.Ota@Sun.COM 
16312198SEiji.Ota@Sun.COM 	/* For now, refuse to add more than one extension header */
16412198SEiji.Ota@Sun.COM 	if (hdr->h_exthdr[0] != RDSV3_EXTHDR_NONE)
16512198SEiji.Ota@Sun.COM 		return (0);
16612198SEiji.Ota@Sun.COM 
16712198SEiji.Ota@Sun.COM 	if (type >= __RDSV3_EXTHDR_MAX ||
16812198SEiji.Ota@Sun.COM 	    len != rdsv3_exthdr_size[type])
16912198SEiji.Ota@Sun.COM 		return (0);
17012198SEiji.Ota@Sun.COM 
17112198SEiji.Ota@Sun.COM 	if (ext_len >= RDSV3_HEADER_EXT_SPACE)
17212198SEiji.Ota@Sun.COM 		return (0);
17312198SEiji.Ota@Sun.COM 	dst = hdr->h_exthdr;
17412198SEiji.Ota@Sun.COM 
17512198SEiji.Ota@Sun.COM 	*dst++ = type;
17612198SEiji.Ota@Sun.COM 	(void) memcpy(dst, data, len);
17712198SEiji.Ota@Sun.COM 
17812198SEiji.Ota@Sun.COM 	dst[len] = RDSV3_EXTHDR_NONE;
17912198SEiji.Ota@Sun.COM 
18012198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF4("rdsv3_message_add_extension", "Return");
18112198SEiji.Ota@Sun.COM 	return (1);
18212198SEiji.Ota@Sun.COM }
18312198SEiji.Ota@Sun.COM 
18412198SEiji.Ota@Sun.COM /*
18512198SEiji.Ota@Sun.COM  * If a message has extension headers, retrieve them here.
18612198SEiji.Ota@Sun.COM  * Call like this:
18712198SEiji.Ota@Sun.COM  *
18812198SEiji.Ota@Sun.COM  * unsigned int pos = 0;
18912198SEiji.Ota@Sun.COM  *
19012198SEiji.Ota@Sun.COM  * while (1) {
19112198SEiji.Ota@Sun.COM  *	buflen = sizeof(buffer);
19212198SEiji.Ota@Sun.COM  *	type = rdsv3_message_next_extension(hdr, &pos, buffer, &buflen);
19312198SEiji.Ota@Sun.COM  *	if (type == RDSV3_EXTHDR_NONE)
19412198SEiji.Ota@Sun.COM  *		break;
19512198SEiji.Ota@Sun.COM  *	...
19612198SEiji.Ota@Sun.COM  * }
19712198SEiji.Ota@Sun.COM  */
19812198SEiji.Ota@Sun.COM int
rdsv3_message_next_extension(struct rdsv3_header * hdr,unsigned int * pos,void * buf,unsigned int * buflen)19912198SEiji.Ota@Sun.COM rdsv3_message_next_extension(struct rdsv3_header *hdr,
20012198SEiji.Ota@Sun.COM     unsigned int *pos, void *buf, unsigned int *buflen)
20112198SEiji.Ota@Sun.COM {
20212198SEiji.Ota@Sun.COM 	unsigned int offset, ext_type, ext_len;
20312198SEiji.Ota@Sun.COM 	uint8_t *src = hdr->h_exthdr;
20412198SEiji.Ota@Sun.COM 
20512198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF4("rdsv3_message_next_extension", "Enter");
20612198SEiji.Ota@Sun.COM 
20712198SEiji.Ota@Sun.COM 	offset = *pos;
20812198SEiji.Ota@Sun.COM 	if (offset >= RDSV3_HEADER_EXT_SPACE)
20912198SEiji.Ota@Sun.COM 		goto none;
21012198SEiji.Ota@Sun.COM 
21112198SEiji.Ota@Sun.COM 	/*
21212198SEiji.Ota@Sun.COM 	 * Get the extension type and length. For now, the
21312198SEiji.Ota@Sun.COM 	 * length is implied by the extension type.
21412198SEiji.Ota@Sun.COM 	 */
21512198SEiji.Ota@Sun.COM 	ext_type = src[offset++];
21612198SEiji.Ota@Sun.COM 
21712198SEiji.Ota@Sun.COM 	if (ext_type == RDSV3_EXTHDR_NONE || ext_type >= __RDSV3_EXTHDR_MAX)
21812198SEiji.Ota@Sun.COM 		goto none;
21912198SEiji.Ota@Sun.COM 	ext_len = rdsv3_exthdr_size[ext_type];
22012198SEiji.Ota@Sun.COM 	if (offset + ext_len > RDSV3_HEADER_EXT_SPACE)
22112198SEiji.Ota@Sun.COM 		goto none;
22212198SEiji.Ota@Sun.COM 
22312198SEiji.Ota@Sun.COM 	*pos = offset + ext_len;
22412198SEiji.Ota@Sun.COM 	if (ext_len < *buflen)
22512198SEiji.Ota@Sun.COM 		*buflen = ext_len;
22612198SEiji.Ota@Sun.COM 	(void) memcpy(buf, src + offset, *buflen);
22712198SEiji.Ota@Sun.COM 	return (ext_type);
22812198SEiji.Ota@Sun.COM 
22912198SEiji.Ota@Sun.COM none:
23012198SEiji.Ota@Sun.COM 	*pos = RDSV3_HEADER_EXT_SPACE;
23112198SEiji.Ota@Sun.COM 	*buflen = 0;
23212198SEiji.Ota@Sun.COM 	return (RDSV3_EXTHDR_NONE);
23312198SEiji.Ota@Sun.COM }
23412198SEiji.Ota@Sun.COM 
23512198SEiji.Ota@Sun.COM int
rdsv3_message_add_version_extension(struct rdsv3_header * hdr,unsigned int version)23612198SEiji.Ota@Sun.COM rdsv3_message_add_version_extension(struct rdsv3_header *hdr,
23712198SEiji.Ota@Sun.COM     unsigned int version)
23812198SEiji.Ota@Sun.COM {
23912198SEiji.Ota@Sun.COM 	struct rdsv3_ext_header_version ext_hdr;
24012198SEiji.Ota@Sun.COM 
24112198SEiji.Ota@Sun.COM 	ext_hdr.h_version = htonl(version);
24212198SEiji.Ota@Sun.COM 	return (rdsv3_message_add_extension(hdr, RDSV3_EXTHDR_VERSION,
24312198SEiji.Ota@Sun.COM 	    &ext_hdr, sizeof (ext_hdr)));
24412198SEiji.Ota@Sun.COM }
24512198SEiji.Ota@Sun.COM 
24612198SEiji.Ota@Sun.COM int
rdsv3_message_get_version_extension(struct rdsv3_header * hdr,unsigned int * version)24712198SEiji.Ota@Sun.COM rdsv3_message_get_version_extension(struct rdsv3_header *hdr,
24812198SEiji.Ota@Sun.COM     unsigned int *version)
24912198SEiji.Ota@Sun.COM {
25012198SEiji.Ota@Sun.COM 	struct rdsv3_ext_header_version ext_hdr;
25112198SEiji.Ota@Sun.COM 	unsigned int pos = 0, len = sizeof (ext_hdr);
25212198SEiji.Ota@Sun.COM 
25312198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF4("rdsv3_message_get_version_extension", "Enter");
25412198SEiji.Ota@Sun.COM 
25512198SEiji.Ota@Sun.COM 	/*
25612198SEiji.Ota@Sun.COM 	 * We assume the version extension is the only one present
25712198SEiji.Ota@Sun.COM 	 */
25812198SEiji.Ota@Sun.COM 	if (rdsv3_message_next_extension(hdr, &pos, &ext_hdr, &len) !=
25912198SEiji.Ota@Sun.COM 	    RDSV3_EXTHDR_VERSION)
26012198SEiji.Ota@Sun.COM 		return (0);
26112198SEiji.Ota@Sun.COM 	*version = ntohl(ext_hdr.h_version);
26212198SEiji.Ota@Sun.COM 	return (1);
26312198SEiji.Ota@Sun.COM }
26412198SEiji.Ota@Sun.COM 
26512198SEiji.Ota@Sun.COM int
rdsv3_message_add_rdma_dest_extension(struct rdsv3_header * hdr,uint32_t r_key,uint32_t offset)26612198SEiji.Ota@Sun.COM rdsv3_message_add_rdma_dest_extension(struct rdsv3_header *hdr, uint32_t r_key,
26712198SEiji.Ota@Sun.COM     uint32_t offset)
26812198SEiji.Ota@Sun.COM {
26912198SEiji.Ota@Sun.COM 	struct rdsv3_ext_header_rdma_dest ext_hdr;
27012198SEiji.Ota@Sun.COM 
27112198SEiji.Ota@Sun.COM 	ext_hdr.h_rdma_rkey = htonl(r_key);
27212198SEiji.Ota@Sun.COM 	ext_hdr.h_rdma_offset = htonl(offset);
27312198SEiji.Ota@Sun.COM 	return (rdsv3_message_add_extension(hdr, RDSV3_EXTHDR_RDMA_DEST,
27412198SEiji.Ota@Sun.COM 	    &ext_hdr, sizeof (ext_hdr)));
27512198SEiji.Ota@Sun.COM }
27612198SEiji.Ota@Sun.COM 
27712198SEiji.Ota@Sun.COM struct rdsv3_message *
rdsv3_message_alloc(unsigned int nents,int gfp)27812198SEiji.Ota@Sun.COM rdsv3_message_alloc(unsigned int nents, int gfp)
27912198SEiji.Ota@Sun.COM {
28012198SEiji.Ota@Sun.COM 	struct rdsv3_message *rm;
28112198SEiji.Ota@Sun.COM 
28212198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF4("rdsv3_message_alloc", "Enter(nents: %d)", nents);
28312198SEiji.Ota@Sun.COM 
28412198SEiji.Ota@Sun.COM 	rm = kmem_zalloc(sizeof (struct rdsv3_message) +
28512198SEiji.Ota@Sun.COM 	    (nents * sizeof (struct rdsv3_scatterlist)), gfp);
28612198SEiji.Ota@Sun.COM 	if (!rm)
28712198SEiji.Ota@Sun.COM 		goto out;
28812198SEiji.Ota@Sun.COM 
28912198SEiji.Ota@Sun.COM 	rm->m_refcount = 1;
29012198SEiji.Ota@Sun.COM 	list_link_init(&rm->m_sock_item);
29112198SEiji.Ota@Sun.COM 	list_link_init(&rm->m_conn_item);
29212198SEiji.Ota@Sun.COM 	mutex_init(&rm->m_rs_lock, NULL, MUTEX_DRIVER, NULL);
29312676SEiji.Ota@Sun.COM 	rdsv3_init_waitqueue(&rm->m_flush_wait);
29412198SEiji.Ota@Sun.COM 
29512198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF4("rdsv3_message_alloc", "Return(rm: %p)", rm);
29612198SEiji.Ota@Sun.COM out:
29712198SEiji.Ota@Sun.COM 	return (rm);
29812198SEiji.Ota@Sun.COM }
29912198SEiji.Ota@Sun.COM 
30012198SEiji.Ota@Sun.COM struct rdsv3_message *
rdsv3_message_map_pages(unsigned long * page_addrs,unsigned int total_len)30112198SEiji.Ota@Sun.COM rdsv3_message_map_pages(unsigned long *page_addrs, unsigned int total_len)
30212198SEiji.Ota@Sun.COM {
30312198SEiji.Ota@Sun.COM 	struct rdsv3_message *rm;
30412198SEiji.Ota@Sun.COM 	unsigned int i;
30512198SEiji.Ota@Sun.COM 
30612198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF4("rdsv3_message_map_pages", "Enter(len: %d)", total_len);
30712198SEiji.Ota@Sun.COM 
30812198SEiji.Ota@Sun.COM #ifndef __lock_lint
30912198SEiji.Ota@Sun.COM 	rm = rdsv3_message_alloc(ceil(total_len, PAGE_SIZE), KM_NOSLEEP);
31012198SEiji.Ota@Sun.COM #else
31112198SEiji.Ota@Sun.COM 	rm = NULL;
31212198SEiji.Ota@Sun.COM #endif
31312198SEiji.Ota@Sun.COM 	if (rm == NULL)
31412198SEiji.Ota@Sun.COM 		return (ERR_PTR(-ENOMEM));
31512198SEiji.Ota@Sun.COM 
31612198SEiji.Ota@Sun.COM 	set_bit(RDSV3_MSG_PAGEVEC, &rm->m_flags);
31712198SEiji.Ota@Sun.COM 	rm->m_inc.i_hdr.h_len = htonl(total_len);
31812198SEiji.Ota@Sun.COM #ifndef __lock_lint
31912198SEiji.Ota@Sun.COM 	rm->m_nents = ceil(total_len, PAGE_SIZE);
32012198SEiji.Ota@Sun.COM #else
32112198SEiji.Ota@Sun.COM 	rm->m_nents = 0;
32212198SEiji.Ota@Sun.COM #endif
32312198SEiji.Ota@Sun.COM 
32412198SEiji.Ota@Sun.COM 	for (i = 0; i < rm->m_nents; ++i) {
32512198SEiji.Ota@Sun.COM 		rdsv3_sg_set_page(&rm->m_sg[i],
32612198SEiji.Ota@Sun.COM 		    page_addrs[i],
32712198SEiji.Ota@Sun.COM 		    PAGE_SIZE, 0);
32812198SEiji.Ota@Sun.COM 	}
32912198SEiji.Ota@Sun.COM 
33012198SEiji.Ota@Sun.COM 	return (rm);
33112198SEiji.Ota@Sun.COM }
33212198SEiji.Ota@Sun.COM 
33312198SEiji.Ota@Sun.COM struct rdsv3_message *
rdsv3_message_copy_from_user(struct uio * uiop,size_t total_len)33412198SEiji.Ota@Sun.COM rdsv3_message_copy_from_user(struct uio *uiop,
33512198SEiji.Ota@Sun.COM     size_t total_len)
33612198SEiji.Ota@Sun.COM {
33712198SEiji.Ota@Sun.COM 	struct rdsv3_message *rm;
33812198SEiji.Ota@Sun.COM 	struct rdsv3_scatterlist *sg;
33912198SEiji.Ota@Sun.COM 	int ret;
34012198SEiji.Ota@Sun.COM 
34112198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF4("rdsv3_message_copy_from_user", "Enter: %d", total_len);
34212198SEiji.Ota@Sun.COM 
34312198SEiji.Ota@Sun.COM #ifndef __lock_lint
34412198SEiji.Ota@Sun.COM 	rm = rdsv3_message_alloc(ceil(total_len, PAGE_SIZE), KM_NOSLEEP);
34512198SEiji.Ota@Sun.COM #else
34612198SEiji.Ota@Sun.COM 	rm = NULL;
34712198SEiji.Ota@Sun.COM #endif
34812198SEiji.Ota@Sun.COM 	if (rm == NULL) {
34912198SEiji.Ota@Sun.COM 		ret = -ENOMEM;
35012198SEiji.Ota@Sun.COM 		goto out;
35112198SEiji.Ota@Sun.COM 	}
35212198SEiji.Ota@Sun.COM 
35312198SEiji.Ota@Sun.COM 	rm->m_inc.i_hdr.h_len = htonl(total_len);
35412198SEiji.Ota@Sun.COM 
35512198SEiji.Ota@Sun.COM 	/*
35612198SEiji.Ota@Sun.COM 	 * now allocate and copy in the data payload.
35712198SEiji.Ota@Sun.COM 	 */
35812198SEiji.Ota@Sun.COM 	sg = rm->m_sg;
35912198SEiji.Ota@Sun.COM 
36012198SEiji.Ota@Sun.COM 	while (total_len) {
36112198SEiji.Ota@Sun.COM 		if (rdsv3_sg_page(sg) == NULL) {
36212198SEiji.Ota@Sun.COM 			ret = rdsv3_page_remainder_alloc(sg, total_len, 0);
36312198SEiji.Ota@Sun.COM 			if (ret)
36412198SEiji.Ota@Sun.COM 				goto out;
36512198SEiji.Ota@Sun.COM 			rm->m_nents++;
36612198SEiji.Ota@Sun.COM 		}
36712198SEiji.Ota@Sun.COM 
36812198SEiji.Ota@Sun.COM 		ret = uiomove(rdsv3_sg_page(sg), rdsv3_sg_len(sg), UIO_WRITE,
36912198SEiji.Ota@Sun.COM 		    uiop);
37012580SGiri.Adari@Sun.COM 		if (ret) {
37112580SGiri.Adari@Sun.COM 			RDSV3_DPRINTF2("rdsv3_message_copy_from_user",
37212580SGiri.Adari@Sun.COM 			    "uiomove failed");
37312580SGiri.Adari@Sun.COM 			ret = -ret;
37412198SEiji.Ota@Sun.COM 			goto out;
37512580SGiri.Adari@Sun.COM 		}
37612198SEiji.Ota@Sun.COM 
37712198SEiji.Ota@Sun.COM 		total_len -= rdsv3_sg_len(sg);
37812198SEiji.Ota@Sun.COM 		sg++;
37912198SEiji.Ota@Sun.COM 	}
38012198SEiji.Ota@Sun.COM 	ret = 0;
38112198SEiji.Ota@Sun.COM out:
38212198SEiji.Ota@Sun.COM 	if (ret) {
38312198SEiji.Ota@Sun.COM 		if (rm)
38412198SEiji.Ota@Sun.COM 			rdsv3_message_put(rm);
38512198SEiji.Ota@Sun.COM 		rm = ERR_PTR(ret);
38612198SEiji.Ota@Sun.COM 	}
38712198SEiji.Ota@Sun.COM 	return (rm);
38812198SEiji.Ota@Sun.COM }
38912198SEiji.Ota@Sun.COM 
39012198SEiji.Ota@Sun.COM int
rdsv3_message_inc_copy_to_user(struct rdsv3_incoming * inc,uio_t * uiop,size_t size)39112198SEiji.Ota@Sun.COM rdsv3_message_inc_copy_to_user(struct rdsv3_incoming *inc,
39212198SEiji.Ota@Sun.COM     uio_t *uiop, size_t size)
39312198SEiji.Ota@Sun.COM {
39412198SEiji.Ota@Sun.COM 	struct rdsv3_message *rm;
39512198SEiji.Ota@Sun.COM 	struct rdsv3_scatterlist *sg;
39612198SEiji.Ota@Sun.COM 	unsigned long to_copy;
39712198SEiji.Ota@Sun.COM 	unsigned long vec_off;
39812198SEiji.Ota@Sun.COM 	int copied;
39912198SEiji.Ota@Sun.COM 	int ret;
40012198SEiji.Ota@Sun.COM 	uint32_t len;
40112198SEiji.Ota@Sun.COM 
40212198SEiji.Ota@Sun.COM 	rm = container_of(inc, struct rdsv3_message, m_inc);
40312198SEiji.Ota@Sun.COM 	len = ntohl(rm->m_inc.i_hdr.h_len);
40412198SEiji.Ota@Sun.COM 
40512198SEiji.Ota@Sun.COM 	RDSV3_DPRINTF4("rdsv3_message_inc_copy_to_user",
40612198SEiji.Ota@Sun.COM 	    "Enter(rm: %p, len: %d)", rm, len);
40712198SEiji.Ota@Sun.COM 
40812198SEiji.Ota@Sun.COM 	sg = rm->m_sg;
40912198SEiji.Ota@Sun.COM 	vec_off = 0;
41012198SEiji.Ota@Sun.COM 	copied = 0;
41112198SEiji.Ota@Sun.COM 
41212198SEiji.Ota@Sun.COM 	while (copied < size && copied < len) {
41312198SEiji.Ota@Sun.COM 
41412198SEiji.Ota@Sun.COM 		to_copy = min(len - copied, sg->length - vec_off);
41512198SEiji.Ota@Sun.COM 		to_copy = min(size - copied, to_copy);
41612198SEiji.Ota@Sun.COM 
41712198SEiji.Ota@Sun.COM 		RDSV3_DPRINTF5("rdsv3_message_inc_copy_to_user",
41812198SEiji.Ota@Sun.COM 		    "copying %lu bytes to user iov %p from sg [%p, %u] + %lu\n",
41912198SEiji.Ota@Sun.COM 		    to_copy, uiop,
42012198SEiji.Ota@Sun.COM 		    rdsv3_sg_page(sg), sg->length, vec_off);
42112198SEiji.Ota@Sun.COM 
42212198SEiji.Ota@Sun.COM 		ret = uiomove(rdsv3_sg_page(sg), to_copy, UIO_READ, uiop);
42312198SEiji.Ota@Sun.COM 		if (ret)
42412198SEiji.Ota@Sun.COM 			break;
42512198SEiji.Ota@Sun.COM 
42612198SEiji.Ota@Sun.COM 		vec_off += to_copy;
42712198SEiji.Ota@Sun.COM 		copied += to_copy;
42812198SEiji.Ota@Sun.COM 
42912198SEiji.Ota@Sun.COM 		if (vec_off == sg->length) {
43012198SEiji.Ota@Sun.COM 			vec_off = 0;
43112198SEiji.Ota@Sun.COM 			sg++;
43212198SEiji.Ota@Sun.COM 		}
43312198SEiji.Ota@Sun.COM 	}
43412198SEiji.Ota@Sun.COM 
43512198SEiji.Ota@Sun.COM 	return (copied);
43612198SEiji.Ota@Sun.COM }
43712198SEiji.Ota@Sun.COM 
43812198SEiji.Ota@Sun.COM /*
43912198SEiji.Ota@Sun.COM  * If the message is still on the send queue, wait until the transport
44012198SEiji.Ota@Sun.COM  * is done with it. This is particularly important for RDMA operations.
44112198SEiji.Ota@Sun.COM  */
44212676SEiji.Ota@Sun.COM /* ARGSUSED */
44312198SEiji.Ota@Sun.COM void
rdsv3_message_wait(struct rdsv3_message * rm)44412198SEiji.Ota@Sun.COM rdsv3_message_wait(struct rdsv3_message *rm)
44512198SEiji.Ota@Sun.COM {
44612676SEiji.Ota@Sun.COM 	rdsv3_wait_event(&rm->m_flush_wait,
44712198SEiji.Ota@Sun.COM 	    !test_bit(RDSV3_MSG_MAPPED, &rm->m_flags));
44812198SEiji.Ota@Sun.COM }
44912198SEiji.Ota@Sun.COM 
45012198SEiji.Ota@Sun.COM void
rdsv3_message_unmapped(struct rdsv3_message * rm)45112198SEiji.Ota@Sun.COM rdsv3_message_unmapped(struct rdsv3_message *rm)
45212198SEiji.Ota@Sun.COM {
45312198SEiji.Ota@Sun.COM 	clear_bit(RDSV3_MSG_MAPPED, &rm->m_flags);
45412676SEiji.Ota@Sun.COM 	rdsv3_wake_up_all(&rm->m_flush_wait);
45512198SEiji.Ota@Sun.COM }
456