112198SEiji.Ota@Sun.COM /*
212198SEiji.Ota@Sun.COM * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
312198SEiji.Ota@Sun.COM */
412198SEiji.Ota@Sun.COM
512198SEiji.Ota@Sun.COM /*
612763SGiri.Adari@Sun.COM * This file contains code imported from the OFED rds source file rdma.c
712763SGiri.Adari@Sun.COM * Oracle elects to have and use the contents of rdma.c under and governed
812763SGiri.Adari@Sun.COM * by the OpenIB.org BSD license (see below for full license text). However,
912763SGiri.Adari@Sun.COM * the following notice accompanied the original version of this file:
1012763SGiri.Adari@Sun.COM */
1112763SGiri.Adari@Sun.COM
1212763SGiri.Adari@Sun.COM /*
1312198SEiji.Ota@Sun.COM * Copyright (c) 2007 Oracle. All rights reserved.
1412198SEiji.Ota@Sun.COM *
1512198SEiji.Ota@Sun.COM * This software is available to you under a choice of one of two
1612198SEiji.Ota@Sun.COM * licenses. You may choose to be licensed under the terms of the GNU
1712198SEiji.Ota@Sun.COM * General Public License (GPL) Version 2, available from the file
1812198SEiji.Ota@Sun.COM * COPYING in the main directory of this source tree, or the
1912198SEiji.Ota@Sun.COM * OpenIB.org BSD license below:
2012198SEiji.Ota@Sun.COM *
2112198SEiji.Ota@Sun.COM * Redistribution and use in source and binary forms, with or
2212198SEiji.Ota@Sun.COM * without modification, are permitted provided that the following
2312198SEiji.Ota@Sun.COM * conditions are met:
2412198SEiji.Ota@Sun.COM *
2512198SEiji.Ota@Sun.COM * - Redistributions of source code must retain the above
2612198SEiji.Ota@Sun.COM * copyright notice, this list of conditions and the following
2712198SEiji.Ota@Sun.COM * disclaimer.
2812198SEiji.Ota@Sun.COM *
2912198SEiji.Ota@Sun.COM * - Redistributions in binary form must reproduce the above
3012198SEiji.Ota@Sun.COM * copyright notice, this list of conditions and the following
3112198SEiji.Ota@Sun.COM * disclaimer in the documentation and/or other materials
3212198SEiji.Ota@Sun.COM * provided with the distribution.
3312198SEiji.Ota@Sun.COM *
3412198SEiji.Ota@Sun.COM * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
3512198SEiji.Ota@Sun.COM * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
3612198SEiji.Ota@Sun.COM * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
3712198SEiji.Ota@Sun.COM * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
3812198SEiji.Ota@Sun.COM * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
3912198SEiji.Ota@Sun.COM * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
4012198SEiji.Ota@Sun.COM * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
4112198SEiji.Ota@Sun.COM * SOFTWARE.
4212198SEiji.Ota@Sun.COM *
4312198SEiji.Ota@Sun.COM */
4412198SEiji.Ota@Sun.COM #include <sys/ib/clients/of/rdma/ib_verbs.h>
4512198SEiji.Ota@Sun.COM #include <sys/ib/clients/of/rdma/ib_addr.h>
4612198SEiji.Ota@Sun.COM #include <sys/ib/clients/of/rdma/rdma_cm.h>
4712198SEiji.Ota@Sun.COM
4812198SEiji.Ota@Sun.COM #include <sys/ib/clients/rdsv3/ib.h>
4912198SEiji.Ota@Sun.COM #include <sys/ib/clients/rdsv3/rdma.h>
5012198SEiji.Ota@Sun.COM #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
5112198SEiji.Ota@Sun.COM
5212198SEiji.Ota@Sun.COM #define DMA_TO_DEVICE 0
5312198SEiji.Ota@Sun.COM #define DMA_FROM_DEVICE 1
5412198SEiji.Ota@Sun.COM #define RB_CLEAR_NODE(nodep) AVL_SETPARENT(nodep, nodep);
5512198SEiji.Ota@Sun.COM
5612198SEiji.Ota@Sun.COM /*
5712198SEiji.Ota@Sun.COM * XXX
5812198SEiji.Ota@Sun.COM * - build with sparse
5912198SEiji.Ota@Sun.COM * - should we limit the size of a mr region? let transport return failure?
6012198SEiji.Ota@Sun.COM * - should we detect duplicate keys on a socket? hmm.
6112198SEiji.Ota@Sun.COM * - an rdma is an mlock, apply rlimit?
6212198SEiji.Ota@Sun.COM */
6312198SEiji.Ota@Sun.COM
6412198SEiji.Ota@Sun.COM /*
6512198SEiji.Ota@Sun.COM * get the number of pages by looking at the page indices that the start and
6612198SEiji.Ota@Sun.COM * end addresses fall in.
6712198SEiji.Ota@Sun.COM *
6812198SEiji.Ota@Sun.COM * Returns 0 if the vec is invalid. It is invalid if the number of bytes
6912198SEiji.Ota@Sun.COM * causes the address to wrap or overflows an unsigned int. This comes
7012198SEiji.Ota@Sun.COM * from being stored in the 'length' member of 'struct rdsv3_scatterlist'.
7112198SEiji.Ota@Sun.COM */
7212198SEiji.Ota@Sun.COM static unsigned int
rdsv3_pages_in_vec(struct rds_iovec * vec)7312863SEiji.Ota@Sun.COM rdsv3_pages_in_vec(struct rds_iovec *vec)
7412198SEiji.Ota@Sun.COM {
7512198SEiji.Ota@Sun.COM if ((vec->addr + vec->bytes <= vec->addr) ||
7612198SEiji.Ota@Sun.COM (vec->bytes > (uint64_t)UINT_MAX)) {
7712198SEiji.Ota@Sun.COM return (0);
7812198SEiji.Ota@Sun.COM }
7912198SEiji.Ota@Sun.COM
8012198SEiji.Ota@Sun.COM return (((vec->addr + vec->bytes + PAGESIZE - 1) >>
8112198SEiji.Ota@Sun.COM PAGESHIFT) - (vec->addr >> PAGESHIFT));
8212198SEiji.Ota@Sun.COM }
8312198SEiji.Ota@Sun.COM
8412198SEiji.Ota@Sun.COM static struct rdsv3_mr *
rdsv3_mr_tree_walk(struct avl_tree * root,uint32_t key,struct rdsv3_mr * insert)8512198SEiji.Ota@Sun.COM rdsv3_mr_tree_walk(struct avl_tree *root, uint32_t key,
8612198SEiji.Ota@Sun.COM struct rdsv3_mr *insert)
8712198SEiji.Ota@Sun.COM {
8812198SEiji.Ota@Sun.COM struct rdsv3_mr *mr;
8912198SEiji.Ota@Sun.COM avl_index_t where;
9012198SEiji.Ota@Sun.COM
9112198SEiji.Ota@Sun.COM mr = avl_find(root, &key, &where);
9212198SEiji.Ota@Sun.COM if ((mr == NULL) && (insert != NULL)) {
9312198SEiji.Ota@Sun.COM avl_insert(root, (void *)insert, where);
9412198SEiji.Ota@Sun.COM atomic_add_32(&insert->r_refcount, 1);
9512198SEiji.Ota@Sun.COM return (NULL);
9612198SEiji.Ota@Sun.COM }
9712198SEiji.Ota@Sun.COM
9812198SEiji.Ota@Sun.COM return (mr);
9912198SEiji.Ota@Sun.COM }
10012198SEiji.Ota@Sun.COM
10112198SEiji.Ota@Sun.COM /*
10212198SEiji.Ota@Sun.COM * Destroy the transport-specific part of a MR.
10312198SEiji.Ota@Sun.COM */
10412198SEiji.Ota@Sun.COM static void
rdsv3_destroy_mr(struct rdsv3_mr * mr)10512198SEiji.Ota@Sun.COM rdsv3_destroy_mr(struct rdsv3_mr *mr)
10612198SEiji.Ota@Sun.COM {
10712198SEiji.Ota@Sun.COM struct rdsv3_sock *rs = mr->r_sock;
10812198SEiji.Ota@Sun.COM void *trans_private = NULL;
10912198SEiji.Ota@Sun.COM avl_node_t *np;
11012198SEiji.Ota@Sun.COM
11112198SEiji.Ota@Sun.COM RDSV3_DPRINTF5("rdsv3_destroy_mr",
11212198SEiji.Ota@Sun.COM "RDS: destroy mr key is %x refcnt %u",
11312198SEiji.Ota@Sun.COM mr->r_key, atomic_get(&mr->r_refcount));
11412198SEiji.Ota@Sun.COM
11512198SEiji.Ota@Sun.COM if (test_and_set_bit(RDSV3_MR_DEAD, &mr->r_state))
11612198SEiji.Ota@Sun.COM return;
11712198SEiji.Ota@Sun.COM
11812198SEiji.Ota@Sun.COM mutex_enter(&rs->rs_rdma_lock);
11912198SEiji.Ota@Sun.COM np = &mr->r_rb_node;
12012198SEiji.Ota@Sun.COM if (AVL_XPARENT(np) != np)
12112198SEiji.Ota@Sun.COM avl_remove(&rs->rs_rdma_keys, mr);
12212198SEiji.Ota@Sun.COM trans_private = mr->r_trans_private;
12312198SEiji.Ota@Sun.COM mr->r_trans_private = NULL;
12412198SEiji.Ota@Sun.COM mutex_exit(&rs->rs_rdma_lock);
12512198SEiji.Ota@Sun.COM
12612198SEiji.Ota@Sun.COM if (trans_private)
12712198SEiji.Ota@Sun.COM mr->r_trans->free_mr(trans_private, mr->r_invalidate);
12812198SEiji.Ota@Sun.COM }
12912198SEiji.Ota@Sun.COM
13012198SEiji.Ota@Sun.COM void
__rdsv3_put_mr_final(struct rdsv3_mr * mr)13112198SEiji.Ota@Sun.COM __rdsv3_put_mr_final(struct rdsv3_mr *mr)
13212198SEiji.Ota@Sun.COM {
13312198SEiji.Ota@Sun.COM rdsv3_destroy_mr(mr);
13412198SEiji.Ota@Sun.COM kmem_free(mr, sizeof (*mr));
13512198SEiji.Ota@Sun.COM }
13612198SEiji.Ota@Sun.COM
13712198SEiji.Ota@Sun.COM /*
13812198SEiji.Ota@Sun.COM * By the time this is called we can't have any more ioctls called on
13912198SEiji.Ota@Sun.COM * the socket so we don't need to worry about racing with others.
14012198SEiji.Ota@Sun.COM */
14112198SEiji.Ota@Sun.COM void
rdsv3_rdma_drop_keys(struct rdsv3_sock * rs)14212198SEiji.Ota@Sun.COM rdsv3_rdma_drop_keys(struct rdsv3_sock *rs)
14312198SEiji.Ota@Sun.COM {
14412198SEiji.Ota@Sun.COM struct rdsv3_mr *mr;
14512198SEiji.Ota@Sun.COM struct avl_node *node;
14612198SEiji.Ota@Sun.COM
14712198SEiji.Ota@Sun.COM /* Release any MRs associated with this socket */
14812198SEiji.Ota@Sun.COM mutex_enter(&rs->rs_rdma_lock);
14912198SEiji.Ota@Sun.COM while ((node = avl_first(&rs->rs_rdma_keys))) {
15012198SEiji.Ota@Sun.COM mr = container_of(node, struct rdsv3_mr, r_rb_node);
15112198SEiji.Ota@Sun.COM if (mr->r_trans == rs->rs_transport)
15212198SEiji.Ota@Sun.COM mr->r_invalidate = 0;
15312198SEiji.Ota@Sun.COM avl_remove(&rs->rs_rdma_keys, &mr->r_rb_node);
15412198SEiji.Ota@Sun.COM RB_CLEAR_NODE(&mr->r_rb_node)
15512198SEiji.Ota@Sun.COM mutex_exit(&rs->rs_rdma_lock);
15612198SEiji.Ota@Sun.COM rdsv3_destroy_mr(mr);
15712198SEiji.Ota@Sun.COM rdsv3_mr_put(mr);
15812198SEiji.Ota@Sun.COM mutex_enter(&rs->rs_rdma_lock);
15912198SEiji.Ota@Sun.COM }
16012198SEiji.Ota@Sun.COM mutex_exit(&rs->rs_rdma_lock);
16112198SEiji.Ota@Sun.COM
16212198SEiji.Ota@Sun.COM if (rs->rs_transport && rs->rs_transport->flush_mrs)
16312198SEiji.Ota@Sun.COM rs->rs_transport->flush_mrs();
16412198SEiji.Ota@Sun.COM }
16512198SEiji.Ota@Sun.COM
16612198SEiji.Ota@Sun.COM static int
__rdsv3_rdma_map(struct rdsv3_sock * rs,struct rds_get_mr_args * args,uint64_t * cookie_ret,struct rdsv3_mr ** mr_ret)16712863SEiji.Ota@Sun.COM __rdsv3_rdma_map(struct rdsv3_sock *rs, struct rds_get_mr_args *args,
16812198SEiji.Ota@Sun.COM uint64_t *cookie_ret, struct rdsv3_mr **mr_ret)
16912198SEiji.Ota@Sun.COM {
17012198SEiji.Ota@Sun.COM struct rdsv3_mr *mr = NULL, *found;
17112198SEiji.Ota@Sun.COM void *trans_private;
17212863SEiji.Ota@Sun.COM rds_rdma_cookie_t cookie;
17312198SEiji.Ota@Sun.COM unsigned int nents = 0;
17412198SEiji.Ota@Sun.COM int ret;
17512198SEiji.Ota@Sun.COM
17612198SEiji.Ota@Sun.COM if (rs->rs_bound_addr == 0) {
17712198SEiji.Ota@Sun.COM ret = -ENOTCONN; /* XXX not a great errno */
17812198SEiji.Ota@Sun.COM goto out;
17912198SEiji.Ota@Sun.COM }
18012198SEiji.Ota@Sun.COM
18112676SEiji.Ota@Sun.COM if (!rs->rs_transport->get_mr) {
18212198SEiji.Ota@Sun.COM ret = -EOPNOTSUPP;
18312198SEiji.Ota@Sun.COM goto out;
18412198SEiji.Ota@Sun.COM }
18512198SEiji.Ota@Sun.COM
18612198SEiji.Ota@Sun.COM mr = kmem_zalloc(sizeof (struct rdsv3_mr), KM_NOSLEEP);
18712676SEiji.Ota@Sun.COM if (!mr) {
18812198SEiji.Ota@Sun.COM ret = -ENOMEM;
18912198SEiji.Ota@Sun.COM goto out;
19012198SEiji.Ota@Sun.COM }
19112198SEiji.Ota@Sun.COM
19212198SEiji.Ota@Sun.COM mr->r_refcount = 1;
19312198SEiji.Ota@Sun.COM RB_CLEAR_NODE(&mr->r_rb_node);
19412198SEiji.Ota@Sun.COM mr->r_trans = rs->rs_transport;
19512198SEiji.Ota@Sun.COM mr->r_sock = rs;
19612198SEiji.Ota@Sun.COM
19712863SEiji.Ota@Sun.COM if (args->flags & RDS_RDMA_USE_ONCE)
19812198SEiji.Ota@Sun.COM mr->r_use_once = 1;
19912863SEiji.Ota@Sun.COM if (args->flags & RDS_RDMA_INVALIDATE)
20012198SEiji.Ota@Sun.COM mr->r_invalidate = 1;
20112863SEiji.Ota@Sun.COM if (args->flags & RDS_RDMA_READWRITE)
20212198SEiji.Ota@Sun.COM mr->r_write = 1;
20312198SEiji.Ota@Sun.COM
20412198SEiji.Ota@Sun.COM /*
20512198SEiji.Ota@Sun.COM * Obtain a transport specific MR. If this succeeds, the
20612198SEiji.Ota@Sun.COM * s/g list is now owned by the MR.
20712198SEiji.Ota@Sun.COM * Note that dma_map() implies that pending writes are
20812198SEiji.Ota@Sun.COM * flushed to RAM, so no dma_sync is needed here.
20912198SEiji.Ota@Sun.COM */
21012198SEiji.Ota@Sun.COM trans_private = rs->rs_transport->get_mr(&args->vec, nents, rs,
21112198SEiji.Ota@Sun.COM &mr->r_key);
21212198SEiji.Ota@Sun.COM
21312198SEiji.Ota@Sun.COM if (IS_ERR(trans_private)) {
21412198SEiji.Ota@Sun.COM ret = PTR_ERR(trans_private);
21512198SEiji.Ota@Sun.COM goto out;
21612198SEiji.Ota@Sun.COM }
21712198SEiji.Ota@Sun.COM
21812198SEiji.Ota@Sun.COM mr->r_trans_private = trans_private;
21912198SEiji.Ota@Sun.COM
22012198SEiji.Ota@Sun.COM /*
22112198SEiji.Ota@Sun.COM * The user may pass us an unaligned address, but we can only
22212198SEiji.Ota@Sun.COM * map page aligned regions. So we keep the offset, and build
22312198SEiji.Ota@Sun.COM * a 64bit cookie containing <R_Key, offset> and pass that
22412198SEiji.Ota@Sun.COM * around.
22512198SEiji.Ota@Sun.COM */
22612198SEiji.Ota@Sun.COM cookie = rdsv3_rdma_make_cookie(mr->r_key, args->vec.addr & ~PAGEMASK);
22712198SEiji.Ota@Sun.COM if (cookie_ret)
22812198SEiji.Ota@Sun.COM *cookie_ret = cookie;
22912198SEiji.Ota@Sun.COM
23012198SEiji.Ota@Sun.COM /*
23112198SEiji.Ota@Sun.COM * copy value of cookie to user address at args->cookie_addr
23212198SEiji.Ota@Sun.COM */
23312198SEiji.Ota@Sun.COM if (args->cookie_addr) {
23412198SEiji.Ota@Sun.COM ret = ddi_copyout((void *)&cookie,
23512198SEiji.Ota@Sun.COM (void *)((intptr_t)args->cookie_addr),
23612863SEiji.Ota@Sun.COM sizeof (rds_rdma_cookie_t), 0);
23712198SEiji.Ota@Sun.COM if (ret != 0) {
23812198SEiji.Ota@Sun.COM ret = -EFAULT;
23912198SEiji.Ota@Sun.COM goto out;
24012198SEiji.Ota@Sun.COM }
24112198SEiji.Ota@Sun.COM }
24212198SEiji.Ota@Sun.COM
24312198SEiji.Ota@Sun.COM RDSV3_DPRINTF5("__rdsv3_rdma_map",
24412198SEiji.Ota@Sun.COM "RDS: get_mr mr 0x%p addr 0x%llx key 0x%x",
24512198SEiji.Ota@Sun.COM mr, args->vec.addr, mr->r_key);
24612198SEiji.Ota@Sun.COM /*
24712198SEiji.Ota@Sun.COM * Inserting the new MR into the rbtree bumps its
24812198SEiji.Ota@Sun.COM * reference count.
24912198SEiji.Ota@Sun.COM */
25012198SEiji.Ota@Sun.COM mutex_enter(&rs->rs_rdma_lock);
25112198SEiji.Ota@Sun.COM found = rdsv3_mr_tree_walk(&rs->rs_rdma_keys, mr->r_key, mr);
25212198SEiji.Ota@Sun.COM mutex_exit(&rs->rs_rdma_lock);
25312198SEiji.Ota@Sun.COM
25412198SEiji.Ota@Sun.COM ASSERT(!(found && found != mr));
25512198SEiji.Ota@Sun.COM
25612198SEiji.Ota@Sun.COM if (mr_ret) {
25712198SEiji.Ota@Sun.COM atomic_add_32(&mr->r_refcount, 1);
25812198SEiji.Ota@Sun.COM *mr_ret = mr;
25912198SEiji.Ota@Sun.COM }
26012198SEiji.Ota@Sun.COM
26112198SEiji.Ota@Sun.COM ret = 0;
26212198SEiji.Ota@Sun.COM out:
26312198SEiji.Ota@Sun.COM if (mr)
26412198SEiji.Ota@Sun.COM rdsv3_mr_put(mr);
26512198SEiji.Ota@Sun.COM return (ret);
26612198SEiji.Ota@Sun.COM }
26712198SEiji.Ota@Sun.COM
26812198SEiji.Ota@Sun.COM int
rdsv3_get_mr(struct rdsv3_sock * rs,const void * optval,int optlen)26912198SEiji.Ota@Sun.COM rdsv3_get_mr(struct rdsv3_sock *rs, const void *optval, int optlen)
27012198SEiji.Ota@Sun.COM {
27112863SEiji.Ota@Sun.COM struct rds_get_mr_args args;
27212198SEiji.Ota@Sun.COM
27312863SEiji.Ota@Sun.COM if (optlen != sizeof (struct rds_get_mr_args))
27412198SEiji.Ota@Sun.COM return (-EINVAL);
27512198SEiji.Ota@Sun.COM
27612198SEiji.Ota@Sun.COM #if 1
27712863SEiji.Ota@Sun.COM bcopy((struct rds_get_mr_args *)optval, &args,
27812863SEiji.Ota@Sun.COM sizeof (struct rds_get_mr_args));
27912198SEiji.Ota@Sun.COM #else
28012198SEiji.Ota@Sun.COM if (ddi_copyin(optval, &args, optlen, 0))
28112198SEiji.Ota@Sun.COM return (-EFAULT);
28212198SEiji.Ota@Sun.COM #endif
28312198SEiji.Ota@Sun.COM
28412198SEiji.Ota@Sun.COM return (__rdsv3_rdma_map(rs, &args, NULL, NULL));
28512198SEiji.Ota@Sun.COM }
28612198SEiji.Ota@Sun.COM
28712414SEiji.Ota@Sun.COM int
rdsv3_get_mr_for_dest(struct rdsv3_sock * rs,const void * optval,int optlen)28812414SEiji.Ota@Sun.COM rdsv3_get_mr_for_dest(struct rdsv3_sock *rs, const void *optval,
28912414SEiji.Ota@Sun.COM int optlen)
29012414SEiji.Ota@Sun.COM {
29112863SEiji.Ota@Sun.COM struct rds_get_mr_for_dest_args args;
29212863SEiji.Ota@Sun.COM struct rds_get_mr_args new_args;
29312414SEiji.Ota@Sun.COM
29412863SEiji.Ota@Sun.COM if (optlen != sizeof (struct rds_get_mr_for_dest_args))
29512414SEiji.Ota@Sun.COM return (-EINVAL);
29612414SEiji.Ota@Sun.COM
29712414SEiji.Ota@Sun.COM #if 1
29812863SEiji.Ota@Sun.COM bcopy((struct rds_get_mr_for_dest_args *)optval, &args,
29912863SEiji.Ota@Sun.COM sizeof (struct rds_get_mr_for_dest_args));
30012414SEiji.Ota@Sun.COM #else
30112414SEiji.Ota@Sun.COM if (ddi_copyin(optval, &args, optlen, 0))
30212414SEiji.Ota@Sun.COM return (-EFAULT);
30312414SEiji.Ota@Sun.COM #endif
30412414SEiji.Ota@Sun.COM
30512414SEiji.Ota@Sun.COM /*
30612414SEiji.Ota@Sun.COM * Initially, just behave like get_mr().
30712414SEiji.Ota@Sun.COM * TODO: Implement get_mr as wrapper around this
30812414SEiji.Ota@Sun.COM * and deprecate it.
30912414SEiji.Ota@Sun.COM */
31012414SEiji.Ota@Sun.COM new_args.vec = args.vec;
31112414SEiji.Ota@Sun.COM new_args.cookie_addr = args.cookie_addr;
31212414SEiji.Ota@Sun.COM new_args.flags = args.flags;
31312414SEiji.Ota@Sun.COM
31412414SEiji.Ota@Sun.COM return (__rdsv3_rdma_map(rs, &new_args, NULL, NULL));
31512414SEiji.Ota@Sun.COM }
31612414SEiji.Ota@Sun.COM
31712198SEiji.Ota@Sun.COM /*
31812198SEiji.Ota@Sun.COM * Free the MR indicated by the given R_Key
31912198SEiji.Ota@Sun.COM */
32012198SEiji.Ota@Sun.COM int
rdsv3_free_mr(struct rdsv3_sock * rs,const void * optval,int optlen)32112198SEiji.Ota@Sun.COM rdsv3_free_mr(struct rdsv3_sock *rs, const void *optval, int optlen)
32212198SEiji.Ota@Sun.COM {
32312863SEiji.Ota@Sun.COM struct rds_free_mr_args args;
32412198SEiji.Ota@Sun.COM struct rdsv3_mr *mr;
32512198SEiji.Ota@Sun.COM
32612863SEiji.Ota@Sun.COM if (optlen != sizeof (struct rds_free_mr_args))
32712198SEiji.Ota@Sun.COM return (-EINVAL);
32812198SEiji.Ota@Sun.COM
32912198SEiji.Ota@Sun.COM #if 1
33012863SEiji.Ota@Sun.COM bcopy((struct rds_free_mr_args *)optval, &args,
33112863SEiji.Ota@Sun.COM sizeof (struct rds_free_mr_args));
33212198SEiji.Ota@Sun.COM #else
33312863SEiji.Ota@Sun.COM if (ddi_copyin((struct rds_free_mr_args *)optval, &args,
33412863SEiji.Ota@Sun.COM sizeof (struct rds_free_mr_args), 0))
33512198SEiji.Ota@Sun.COM return (-EFAULT);
33612198SEiji.Ota@Sun.COM #endif
33712198SEiji.Ota@Sun.COM
33812198SEiji.Ota@Sun.COM /* Special case - a null cookie means flush all unused MRs */
33912198SEiji.Ota@Sun.COM if (args.cookie == 0) {
34012198SEiji.Ota@Sun.COM if (!rs->rs_transport || !rs->rs_transport->flush_mrs)
34112198SEiji.Ota@Sun.COM return (-EINVAL);
34212198SEiji.Ota@Sun.COM rs->rs_transport->flush_mrs();
34312198SEiji.Ota@Sun.COM return (0);
34412198SEiji.Ota@Sun.COM }
34512198SEiji.Ota@Sun.COM
34612198SEiji.Ota@Sun.COM /*
34712198SEiji.Ota@Sun.COM * Look up the MR given its R_key and remove it from the rbtree
34812198SEiji.Ota@Sun.COM * so nobody else finds it.
34912198SEiji.Ota@Sun.COM * This should also prevent races with rdsv3_rdma_unuse.
35012198SEiji.Ota@Sun.COM */
35112198SEiji.Ota@Sun.COM mutex_enter(&rs->rs_rdma_lock);
35212198SEiji.Ota@Sun.COM mr = rdsv3_mr_tree_walk(&rs->rs_rdma_keys,
35312198SEiji.Ota@Sun.COM rdsv3_rdma_cookie_key(args.cookie), NULL);
35412198SEiji.Ota@Sun.COM if (mr) {
35512198SEiji.Ota@Sun.COM avl_remove(&rs->rs_rdma_keys, &mr->r_rb_node);
35612198SEiji.Ota@Sun.COM RB_CLEAR_NODE(&mr->r_rb_node);
35712863SEiji.Ota@Sun.COM if (args.flags & RDS_RDMA_INVALIDATE)
35812198SEiji.Ota@Sun.COM mr->r_invalidate = 1;
35912198SEiji.Ota@Sun.COM }
36012198SEiji.Ota@Sun.COM mutex_exit(&rs->rs_rdma_lock);
36112198SEiji.Ota@Sun.COM
36212198SEiji.Ota@Sun.COM if (!mr)
36312198SEiji.Ota@Sun.COM return (-EINVAL);
36412198SEiji.Ota@Sun.COM
36512198SEiji.Ota@Sun.COM /*
36612198SEiji.Ota@Sun.COM * call rdsv3_destroy_mr() ourselves so that we're sure it's done
36712198SEiji.Ota@Sun.COM * by time we return. If we let rdsv3_mr_put() do it it might not
36812198SEiji.Ota@Sun.COM * happen until someone else drops their ref.
36912198SEiji.Ota@Sun.COM */
37012198SEiji.Ota@Sun.COM rdsv3_destroy_mr(mr);
37112198SEiji.Ota@Sun.COM rdsv3_mr_put(mr);
37212198SEiji.Ota@Sun.COM return (0);
37312198SEiji.Ota@Sun.COM }
37412198SEiji.Ota@Sun.COM
37512198SEiji.Ota@Sun.COM /*
37612198SEiji.Ota@Sun.COM * This is called when we receive an extension header that
37712198SEiji.Ota@Sun.COM * tells us this MR was used. It allows us to implement
37812198SEiji.Ota@Sun.COM * use_once semantics
37912198SEiji.Ota@Sun.COM */
38012198SEiji.Ota@Sun.COM void
rdsv3_rdma_unuse(struct rdsv3_sock * rs,uint32_t r_key,int force)38112198SEiji.Ota@Sun.COM rdsv3_rdma_unuse(struct rdsv3_sock *rs, uint32_t r_key, int force)
38212198SEiji.Ota@Sun.COM {
38312198SEiji.Ota@Sun.COM struct rdsv3_mr *mr;
38412198SEiji.Ota@Sun.COM int zot_me = 0;
38512198SEiji.Ota@Sun.COM
38612198SEiji.Ota@Sun.COM RDSV3_DPRINTF4("rdsv3_rdma_unuse", "Enter rkey: 0x%x", r_key);
38712198SEiji.Ota@Sun.COM
38812198SEiji.Ota@Sun.COM mutex_enter(&rs->rs_rdma_lock);
38912198SEiji.Ota@Sun.COM mr = rdsv3_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL);
39012676SEiji.Ota@Sun.COM if (!mr) {
39112676SEiji.Ota@Sun.COM RDSV3_DPRINTF4("rdsv3_rdma_unuse",
39212676SEiji.Ota@Sun.COM "rdsv3: trying to unuse MR with unknown r_key %u!", r_key);
39312676SEiji.Ota@Sun.COM mutex_exit(&rs->rs_rdma_lock);
39412676SEiji.Ota@Sun.COM return;
39512676SEiji.Ota@Sun.COM }
39612676SEiji.Ota@Sun.COM
39712676SEiji.Ota@Sun.COM if (mr->r_use_once || force) {
39812198SEiji.Ota@Sun.COM avl_remove(&rs->rs_rdma_keys, &mr->r_rb_node);
39912198SEiji.Ota@Sun.COM RB_CLEAR_NODE(&mr->r_rb_node);
40012198SEiji.Ota@Sun.COM zot_me = 1;
401*12991SEiji.Ota@Sun.COM } else {
402*12991SEiji.Ota@Sun.COM atomic_add_32(&mr->r_refcount, 1);
40312676SEiji.Ota@Sun.COM }
40412198SEiji.Ota@Sun.COM mutex_exit(&rs->rs_rdma_lock);
40512198SEiji.Ota@Sun.COM
40612198SEiji.Ota@Sun.COM /*
40712198SEiji.Ota@Sun.COM * May have to issue a dma_sync on this memory region.
40812198SEiji.Ota@Sun.COM * Note we could avoid this if the operation was a RDMA READ,
40912198SEiji.Ota@Sun.COM * but at this point we can't tell.
41012198SEiji.Ota@Sun.COM */
41112676SEiji.Ota@Sun.COM if (mr->r_trans->sync_mr)
41212676SEiji.Ota@Sun.COM mr->r_trans->sync_mr(mr->r_trans_private, DMA_FROM_DEVICE);
41312198SEiji.Ota@Sun.COM
41412676SEiji.Ota@Sun.COM /*
41512676SEiji.Ota@Sun.COM * If the MR was marked as invalidate, this will
41612676SEiji.Ota@Sun.COM * trigger an async flush.
41712676SEiji.Ota@Sun.COM */
41812676SEiji.Ota@Sun.COM if (zot_me)
41912676SEiji.Ota@Sun.COM rdsv3_destroy_mr(mr);
42012676SEiji.Ota@Sun.COM rdsv3_mr_put(mr);
42112198SEiji.Ota@Sun.COM RDSV3_DPRINTF4("rdsv3_rdma_unuse", "Return");
42212198SEiji.Ota@Sun.COM }
42312198SEiji.Ota@Sun.COM
42412198SEiji.Ota@Sun.COM void
rdsv3_rdma_free_op(struct rdsv3_rdma_op * ro)42512198SEiji.Ota@Sun.COM rdsv3_rdma_free_op(struct rdsv3_rdma_op *ro)
42612198SEiji.Ota@Sun.COM {
42712198SEiji.Ota@Sun.COM unsigned int i;
42812198SEiji.Ota@Sun.COM
42912198SEiji.Ota@Sun.COM /* deallocate RDMA resources on rdsv3_message */
43012198SEiji.Ota@Sun.COM for (i = 0; i < ro->r_nents; i++) {
43112198SEiji.Ota@Sun.COM ddi_umem_unlock(ro->r_rdma_sg[i].umem_cookie);
43212198SEiji.Ota@Sun.COM }
43312198SEiji.Ota@Sun.COM
43412198SEiji.Ota@Sun.COM if (ro->r_notifier)
43512198SEiji.Ota@Sun.COM kmem_free(ro->r_notifier, sizeof (*ro->r_notifier));
43612198SEiji.Ota@Sun.COM kmem_free(ro, sizeof (*ro));
43712198SEiji.Ota@Sun.COM }
43812198SEiji.Ota@Sun.COM
43912198SEiji.Ota@Sun.COM /*
44012198SEiji.Ota@Sun.COM * args is a pointer to an in-kernel copy in the sendmsg cmsg.
44112198SEiji.Ota@Sun.COM */
44212198SEiji.Ota@Sun.COM static struct rdsv3_rdma_op *
rdsv3_rdma_prepare(struct rdsv3_sock * rs,struct rds_rdma_args * args)44312863SEiji.Ota@Sun.COM rdsv3_rdma_prepare(struct rdsv3_sock *rs, struct rds_rdma_args *args)
44412198SEiji.Ota@Sun.COM {
44512863SEiji.Ota@Sun.COM struct rds_iovec vec;
44612198SEiji.Ota@Sun.COM struct rdsv3_rdma_op *op = NULL;
44712198SEiji.Ota@Sun.COM unsigned int nr_bytes;
44812863SEiji.Ota@Sun.COM struct rds_iovec *local_vec;
44912198SEiji.Ota@Sun.COM unsigned int nr;
45012198SEiji.Ota@Sun.COM unsigned int i;
45112198SEiji.Ota@Sun.COM ddi_umem_cookie_t umem_cookie;
45212198SEiji.Ota@Sun.COM size_t umem_len;
45312198SEiji.Ota@Sun.COM caddr_t umem_addr;
45412198SEiji.Ota@Sun.COM int ret;
45512198SEiji.Ota@Sun.COM
45612198SEiji.Ota@Sun.COM if (rs->rs_bound_addr == 0) {
45712198SEiji.Ota@Sun.COM ret = -ENOTCONN; /* XXX not a great errno */
45812198SEiji.Ota@Sun.COM goto out;
45912198SEiji.Ota@Sun.COM }
46012198SEiji.Ota@Sun.COM
46112198SEiji.Ota@Sun.COM if (args->nr_local > (uint64_t)UINT_MAX) {
46212198SEiji.Ota@Sun.COM ret = -EMSGSIZE;
46312198SEiji.Ota@Sun.COM goto out;
46412198SEiji.Ota@Sun.COM }
46512198SEiji.Ota@Sun.COM
46612198SEiji.Ota@Sun.COM op = kmem_zalloc(offsetof(struct rdsv3_rdma_op,
46712198SEiji.Ota@Sun.COM r_rdma_sg[args->nr_local]), KM_NOSLEEP);
46812198SEiji.Ota@Sun.COM if (op == NULL) {
46912198SEiji.Ota@Sun.COM ret = -ENOMEM;
47012198SEiji.Ota@Sun.COM goto out;
47112198SEiji.Ota@Sun.COM }
47212198SEiji.Ota@Sun.COM
47312863SEiji.Ota@Sun.COM op->r_write = !!(args->flags & RDS_RDMA_READWRITE);
47412863SEiji.Ota@Sun.COM op->r_fence = !!(args->flags & RDS_RDMA_FENCE);
47512863SEiji.Ota@Sun.COM op->r_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME);
47612198SEiji.Ota@Sun.COM op->r_recverr = rs->rs_recverr;
47712198SEiji.Ota@Sun.COM
47812198SEiji.Ota@Sun.COM if (op->r_notify || op->r_recverr) {
47912198SEiji.Ota@Sun.COM /*
48012198SEiji.Ota@Sun.COM * We allocate an uninitialized notifier here, because
48112198SEiji.Ota@Sun.COM * we don't want to do that in the completion handler. We
48212198SEiji.Ota@Sun.COM * would have to use GFP_ATOMIC there, and don't want to deal
48312198SEiji.Ota@Sun.COM * with failed allocations.
48412198SEiji.Ota@Sun.COM */
48512198SEiji.Ota@Sun.COM op->r_notifier = kmem_alloc(sizeof (struct rdsv3_notifier),
48612198SEiji.Ota@Sun.COM KM_NOSLEEP);
48712198SEiji.Ota@Sun.COM if (!op->r_notifier) {
48812198SEiji.Ota@Sun.COM ret = -ENOMEM;
48912198SEiji.Ota@Sun.COM goto out;
49012198SEiji.Ota@Sun.COM }
49112198SEiji.Ota@Sun.COM op->r_notifier->n_user_token = args->user_token;
49212863SEiji.Ota@Sun.COM op->r_notifier->n_status = RDS_RDMA_SUCCESS;
49312198SEiji.Ota@Sun.COM }
49412198SEiji.Ota@Sun.COM
49512198SEiji.Ota@Sun.COM /*
49612198SEiji.Ota@Sun.COM * The cookie contains the R_Key of the remote memory region, and
49712198SEiji.Ota@Sun.COM * optionally an offset into it. This is how we implement RDMA into
49812198SEiji.Ota@Sun.COM * unaligned memory.
49912198SEiji.Ota@Sun.COM * When setting up the RDMA, we need to add that offset to the
50012198SEiji.Ota@Sun.COM * destination address (which is really an offset into the MR)
50112198SEiji.Ota@Sun.COM * FIXME: We may want to move this into ib_rdma.c
50212198SEiji.Ota@Sun.COM */
50312198SEiji.Ota@Sun.COM op->r_key = rdsv3_rdma_cookie_key(args->cookie);
50412198SEiji.Ota@Sun.COM op->r_remote_addr = args->remote_vec.addr +
50512198SEiji.Ota@Sun.COM rdsv3_rdma_cookie_offset(args->cookie);
50612198SEiji.Ota@Sun.COM
50712198SEiji.Ota@Sun.COM nr_bytes = 0;
50812198SEiji.Ota@Sun.COM
50912198SEiji.Ota@Sun.COM RDSV3_DPRINTF5("rdsv3_rdma_prepare",
51012198SEiji.Ota@Sun.COM "RDS: rdma prepare nr_local %llu rva %llx rkey %x",
51112198SEiji.Ota@Sun.COM (unsigned long long)args->nr_local,
51212198SEiji.Ota@Sun.COM (unsigned long long)args->remote_vec.addr,
51312198SEiji.Ota@Sun.COM op->r_key);
51412198SEiji.Ota@Sun.COM
51512863SEiji.Ota@Sun.COM local_vec = (struct rds_iovec *)(unsigned long) args->local_vec_addr;
51612198SEiji.Ota@Sun.COM
51712198SEiji.Ota@Sun.COM /* pin the scatter list of user buffers */
51812198SEiji.Ota@Sun.COM for (i = 0; i < args->nr_local; i++) {
51912198SEiji.Ota@Sun.COM if (ddi_copyin(&local_vec[i], &vec,
52012863SEiji.Ota@Sun.COM sizeof (struct rds_iovec), 0)) {
52112198SEiji.Ota@Sun.COM ret = -EFAULT;
52212198SEiji.Ota@Sun.COM goto out;
52312198SEiji.Ota@Sun.COM }
52412198SEiji.Ota@Sun.COM
52512198SEiji.Ota@Sun.COM nr = rdsv3_pages_in_vec(&vec);
52612198SEiji.Ota@Sun.COM if (nr == 0) {
52712198SEiji.Ota@Sun.COM RDSV3_DPRINTF2("rdsv3_rdma_prepare",
52812198SEiji.Ota@Sun.COM "rdsv3_pages_in_vec returned 0");
52912198SEiji.Ota@Sun.COM ret = -EINVAL;
53012198SEiji.Ota@Sun.COM goto out;
53112198SEiji.Ota@Sun.COM }
53212198SEiji.Ota@Sun.COM
53312198SEiji.Ota@Sun.COM rs->rs_user_addr = vec.addr;
53412198SEiji.Ota@Sun.COM rs->rs_user_bytes = vec.bytes;
53512198SEiji.Ota@Sun.COM
53612198SEiji.Ota@Sun.COM /* pin user memory pages */
53712198SEiji.Ota@Sun.COM umem_len = ptob(btopr(vec.bytes +
53812198SEiji.Ota@Sun.COM ((uintptr_t)vec.addr & PAGEOFFSET)));
53912198SEiji.Ota@Sun.COM umem_addr = (caddr_t)((uintptr_t)vec.addr & ~PAGEOFFSET);
54012444SGiri.Adari@Sun.COM ret = umem_lockmemory(umem_addr, umem_len,
54112444SGiri.Adari@Sun.COM DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ,
54212444SGiri.Adari@Sun.COM &umem_cookie, NULL, NULL);
54312198SEiji.Ota@Sun.COM if (ret != 0) {
54412198SEiji.Ota@Sun.COM RDSV3_DPRINTF2("rdsv3_rdma_prepare",
54512198SEiji.Ota@Sun.COM "umem_lockmemory() returned %d", ret);
54612198SEiji.Ota@Sun.COM ret = -EFAULT;
54712198SEiji.Ota@Sun.COM goto out;
54812198SEiji.Ota@Sun.COM }
54912198SEiji.Ota@Sun.COM op->r_rdma_sg[i].umem_cookie = umem_cookie;
55012198SEiji.Ota@Sun.COM op->r_rdma_sg[i].iovec = vec;
55112198SEiji.Ota@Sun.COM nr_bytes += vec.bytes;
55212198SEiji.Ota@Sun.COM
55312198SEiji.Ota@Sun.COM RDSV3_DPRINTF5("rdsv3_rdma_prepare",
55412198SEiji.Ota@Sun.COM "RDS: nr_bytes %u nr %u vec.bytes %llu vec.addr %llx",
55512198SEiji.Ota@Sun.COM nr_bytes, nr, vec.bytes, vec.addr);
55612198SEiji.Ota@Sun.COM }
55712198SEiji.Ota@Sun.COM op->r_nents = i;
55812198SEiji.Ota@Sun.COM
55912198SEiji.Ota@Sun.COM if (nr_bytes > args->remote_vec.bytes) {
56012198SEiji.Ota@Sun.COM RDSV3_DPRINTF2("rdsv3_rdma_prepare",
56112198SEiji.Ota@Sun.COM "RDS nr_bytes %u remote_bytes %u do not match",
56212198SEiji.Ota@Sun.COM nr_bytes, (unsigned int) args->remote_vec.bytes);
56312198SEiji.Ota@Sun.COM ret = -EINVAL;
56412198SEiji.Ota@Sun.COM goto out;
56512198SEiji.Ota@Sun.COM }
56612198SEiji.Ota@Sun.COM op->r_bytes = nr_bytes;
56712198SEiji.Ota@Sun.COM
56812198SEiji.Ota@Sun.COM ret = 0;
56912198SEiji.Ota@Sun.COM out:
57012198SEiji.Ota@Sun.COM if (ret) {
57112198SEiji.Ota@Sun.COM if (op)
57212198SEiji.Ota@Sun.COM rdsv3_rdma_free_op(op);
57312198SEiji.Ota@Sun.COM op = ERR_PTR(ret);
57412198SEiji.Ota@Sun.COM }
57512198SEiji.Ota@Sun.COM return (op);
57612198SEiji.Ota@Sun.COM }
57712198SEiji.Ota@Sun.COM
578*12991SEiji.Ota@Sun.COM #define CEIL(x, y) (((x) + (y) - 1) / (y))
579*12991SEiji.Ota@Sun.COM
58012198SEiji.Ota@Sun.COM /*
58112198SEiji.Ota@Sun.COM * The application asks for a RDMA transfer.
58212198SEiji.Ota@Sun.COM * Extract all arguments and set up the rdma_op
58312198SEiji.Ota@Sun.COM */
58412198SEiji.Ota@Sun.COM int
rdsv3_cmsg_rdma_args(struct rdsv3_sock * rs,struct rdsv3_message * rm,struct cmsghdr * cmsg)58512198SEiji.Ota@Sun.COM rdsv3_cmsg_rdma_args(struct rdsv3_sock *rs, struct rdsv3_message *rm,
58612198SEiji.Ota@Sun.COM struct cmsghdr *cmsg)
58712198SEiji.Ota@Sun.COM {
58812198SEiji.Ota@Sun.COM struct rdsv3_rdma_op *op;
58912966SEiji.Ota@Sun.COM /* uint64_t alignment on the buffer */
590*12991SEiji.Ota@Sun.COM uint64_t buf[CEIL(CMSG_LEN(sizeof (struct rds_rdma_args)),
59112966SEiji.Ota@Sun.COM sizeof (uint64_t))];
59212198SEiji.Ota@Sun.COM
59312966SEiji.Ota@Sun.COM if (cmsg->cmsg_len != CMSG_LEN(sizeof (struct rds_rdma_args)) ||
59412198SEiji.Ota@Sun.COM rm->m_rdma_op != NULL)
59512198SEiji.Ota@Sun.COM return (-EINVAL);
59612198SEiji.Ota@Sun.COM
59712966SEiji.Ota@Sun.COM ASSERT(sizeof (buf) >= cmsg->cmsg_len && ((uintptr_t)buf & 0x7) == 0);
59812966SEiji.Ota@Sun.COM
59912966SEiji.Ota@Sun.COM bcopy(CMSG_DATA(cmsg), (char *)buf, cmsg->cmsg_len);
60012966SEiji.Ota@Sun.COM op = rdsv3_rdma_prepare(rs, (struct rds_rdma_args *)buf);
60112966SEiji.Ota@Sun.COM
60212198SEiji.Ota@Sun.COM if (IS_ERR(op))
60312198SEiji.Ota@Sun.COM return (PTR_ERR(op));
60412198SEiji.Ota@Sun.COM rdsv3_stats_inc(s_send_rdma);
60512198SEiji.Ota@Sun.COM rm->m_rdma_op = op;
60612198SEiji.Ota@Sun.COM return (0);
60712198SEiji.Ota@Sun.COM }
60812198SEiji.Ota@Sun.COM
60912198SEiji.Ota@Sun.COM /*
61012198SEiji.Ota@Sun.COM * The application wants us to pass an RDMA destination (aka MR)
61112198SEiji.Ota@Sun.COM * to the remote
61212198SEiji.Ota@Sun.COM */
61312198SEiji.Ota@Sun.COM int
rdsv3_cmsg_rdma_dest(struct rdsv3_sock * rs,struct rdsv3_message * rm,struct cmsghdr * cmsg)61412198SEiji.Ota@Sun.COM rdsv3_cmsg_rdma_dest(struct rdsv3_sock *rs, struct rdsv3_message *rm,
61512198SEiji.Ota@Sun.COM struct cmsghdr *cmsg)
61612198SEiji.Ota@Sun.COM {
61712198SEiji.Ota@Sun.COM struct rdsv3_mr *mr;
61812198SEiji.Ota@Sun.COM uint32_t r_key;
61912198SEiji.Ota@Sun.COM int err = 0;
62012198SEiji.Ota@Sun.COM
62112966SEiji.Ota@Sun.COM if (cmsg->cmsg_len != CMSG_LEN(sizeof (rds_rdma_cookie_t)) ||
62212198SEiji.Ota@Sun.COM rm->m_rdma_cookie != 0)
62312198SEiji.Ota@Sun.COM return (-EINVAL);
62412198SEiji.Ota@Sun.COM
62512198SEiji.Ota@Sun.COM (void) memcpy(&rm->m_rdma_cookie, CMSG_DATA(cmsg),
62612198SEiji.Ota@Sun.COM sizeof (rm->m_rdma_cookie));
62712198SEiji.Ota@Sun.COM
62812198SEiji.Ota@Sun.COM /*
62912198SEiji.Ota@Sun.COM * We are reusing a previously mapped MR here. Most likely, the
63012198SEiji.Ota@Sun.COM * application has written to the buffer, so we need to explicitly
63112198SEiji.Ota@Sun.COM * flush those writes to RAM. Otherwise the HCA may not see them
63212198SEiji.Ota@Sun.COM * when doing a DMA from that buffer.
63312198SEiji.Ota@Sun.COM */
63412198SEiji.Ota@Sun.COM r_key = rdsv3_rdma_cookie_key(rm->m_rdma_cookie);
63512198SEiji.Ota@Sun.COM
63612198SEiji.Ota@Sun.COM mutex_enter(&rs->rs_rdma_lock);
63712198SEiji.Ota@Sun.COM mr = rdsv3_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL);
63812676SEiji.Ota@Sun.COM if (!mr)
63912198SEiji.Ota@Sun.COM err = -EINVAL; /* invalid r_key */
64012198SEiji.Ota@Sun.COM else
64112198SEiji.Ota@Sun.COM atomic_add_32(&mr->r_refcount, 1);
64212198SEiji.Ota@Sun.COM mutex_exit(&rs->rs_rdma_lock);
64312198SEiji.Ota@Sun.COM
64412198SEiji.Ota@Sun.COM if (mr) {
64512198SEiji.Ota@Sun.COM mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE);
64612198SEiji.Ota@Sun.COM rm->m_rdma_mr = mr;
64712198SEiji.Ota@Sun.COM }
64812198SEiji.Ota@Sun.COM return (err);
64912198SEiji.Ota@Sun.COM }
65012198SEiji.Ota@Sun.COM
65112198SEiji.Ota@Sun.COM /*
65212198SEiji.Ota@Sun.COM * The application passes us an address range it wants to enable RDMA
65312198SEiji.Ota@Sun.COM * to/from. We map the area, and save the <R_Key,offset> pair
65412198SEiji.Ota@Sun.COM * in rm->m_rdma_cookie. This causes it to be sent along to the peer
65512198SEiji.Ota@Sun.COM * in an extension header.
65612198SEiji.Ota@Sun.COM */
65712198SEiji.Ota@Sun.COM int
rdsv3_cmsg_rdma_map(struct rdsv3_sock * rs,struct rdsv3_message * rm,struct cmsghdr * cmsg)65812198SEiji.Ota@Sun.COM rdsv3_cmsg_rdma_map(struct rdsv3_sock *rs, struct rdsv3_message *rm,
65912198SEiji.Ota@Sun.COM struct cmsghdr *cmsg)
66012198SEiji.Ota@Sun.COM {
66112966SEiji.Ota@Sun.COM /* uint64_t alignment on the buffer */
662*12991SEiji.Ota@Sun.COM uint64_t buf[CEIL(CMSG_LEN(sizeof (struct rds_get_mr_args)),
66312966SEiji.Ota@Sun.COM sizeof (uint64_t))];
66412198SEiji.Ota@Sun.COM int status;
66512198SEiji.Ota@Sun.COM
66612966SEiji.Ota@Sun.COM if (cmsg->cmsg_len != CMSG_LEN(sizeof (struct rds_get_mr_args)) ||
66712198SEiji.Ota@Sun.COM rm->m_rdma_cookie != 0)
66812198SEiji.Ota@Sun.COM return (-EINVAL);
66912198SEiji.Ota@Sun.COM
67012966SEiji.Ota@Sun.COM ASSERT(sizeof (buf) >= cmsg->cmsg_len && ((uintptr_t)buf & 0x7) == 0);
67112966SEiji.Ota@Sun.COM
67212966SEiji.Ota@Sun.COM bcopy(CMSG_DATA(cmsg), (char *)buf, cmsg->cmsg_len);
67312966SEiji.Ota@Sun.COM status = __rdsv3_rdma_map(rs, (struct rds_get_mr_args *)buf,
67412966SEiji.Ota@Sun.COM &rm->m_rdma_cookie, &rm->m_rdma_mr);
67512966SEiji.Ota@Sun.COM
67612198SEiji.Ota@Sun.COM return (status);
67712198SEiji.Ota@Sun.COM }
678