xref: /spdk/test/unit/lib/nvmf/rdma.c/rdma_ut.c (revision dcf0ca15c9189a1e34c35d90fb0721717cea042c)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation. All rights reserved.
5  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 #include "spdk_cunit.h"
36 #include "common/lib/test_env.c"
37 #include "nvmf/rdma.c"
38 #include "nvmf/transport.c"
39 
40 uint64_t g_mr_size;
41 uint64_t g_mr_next_size;
42 struct ibv_mr g_rdma_mr;
43 
44 #define RDMA_UT_UNITS_IN_MAX_IO 16
45 
46 struct spdk_nvmf_transport_opts g_rdma_ut_transport_opts = {
47 	.max_queue_depth = SPDK_NVMF_RDMA_DEFAULT_MAX_QUEUE_DEPTH,
48 	.max_qpairs_per_ctrlr = SPDK_NVMF_RDMA_DEFAULT_MAX_QPAIRS_PER_CTRLR,
49 	.in_capsule_data_size = SPDK_NVMF_RDMA_DEFAULT_IN_CAPSULE_DATA_SIZE,
50 	.max_io_size = (SPDK_NVMF_RDMA_MIN_IO_BUFFER_SIZE * RDMA_UT_UNITS_IN_MAX_IO),
51 	.io_unit_size = SPDK_NVMF_RDMA_MIN_IO_BUFFER_SIZE,
52 	.max_aq_depth = SPDK_NVMF_RDMA_DEFAULT_AQ_DEPTH,
53 	.num_shared_buffers = SPDK_NVMF_RDMA_DEFAULT_NUM_SHARED_BUFFERS,
54 };
55 
56 SPDK_LOG_REGISTER_COMPONENT("nvmf", SPDK_LOG_NVMF)
57 DEFINE_STUB(spdk_mem_map_set_translation, int, (struct spdk_mem_map *map, uint64_t vaddr,
58 		uint64_t size, uint64_t translation), 0);
59 DEFINE_STUB(spdk_mem_map_clear_translation, int, (struct spdk_mem_map *map, uint64_t vaddr,
60 		uint64_t size), 0);
61 DEFINE_STUB(spdk_mem_map_alloc, struct spdk_mem_map *, (uint64_t default_translation,
62 		const struct spdk_mem_map_ops *ops, void *cb_ctx), NULL);
63 DEFINE_STUB(spdk_nvmf_qpair_disconnect, int, (struct spdk_nvmf_qpair *qpair,
64 		nvmf_qpair_disconnect_cb cb_fn, void *ctx), 0);
65 DEFINE_STUB_V(spdk_mem_map_free, (struct spdk_mem_map **pmap));
66 
67 struct spdk_trace_histories *g_trace_histories;
68 DEFINE_STUB_V(spdk_trace_add_register_fn, (struct spdk_trace_register_fn *reg_fn));
69 DEFINE_STUB_V(spdk_trace_register_object, (uint8_t type, char id_prefix));
70 DEFINE_STUB_V(spdk_trace_register_description, (const char *name,
71 		uint16_t tpoint_id, uint8_t owner_type, uint8_t object_type, uint8_t new_object,
72 		uint8_t arg1_type, const char *arg1_name));
73 DEFINE_STUB_V(_spdk_trace_record, (uint64_t tsc, uint16_t tpoint_id, uint16_t poller_id,
74 				   uint32_t size, uint64_t object_id, uint64_t arg1));
75 
76 DEFINE_STUB_V(spdk_nvmf_request_exec, (struct spdk_nvmf_request *req));
77 DEFINE_STUB(spdk_nvme_transport_id_compare, int, (const struct spdk_nvme_transport_id *trid1,
78 		const struct spdk_nvme_transport_id *trid2), 0);
79 DEFINE_STUB_V(spdk_nvmf_ctrlr_abort_aer, (struct spdk_nvmf_ctrlr *ctrlr));
80 DEFINE_STUB(spdk_nvmf_request_get_dif_ctx, bool, (struct spdk_nvmf_request *req,
81 		struct spdk_dif_ctx *dif_ctx), false);
82 DEFINE_STUB_V(spdk_nvme_trid_populate_transport, (struct spdk_nvme_transport_id *trid,
83 		enum spdk_nvme_transport_type trtype));
84 
85 const char *
86 spdk_nvme_transport_id_trtype_str(enum spdk_nvme_transport_type trtype)
87 {
88 	switch (trtype) {
89 	case SPDK_NVME_TRANSPORT_PCIE:
90 		return "PCIe";
91 	case SPDK_NVME_TRANSPORT_RDMA:
92 		return "RDMA";
93 	case SPDK_NVME_TRANSPORT_FC:
94 		return "FC";
95 	default:
96 		return NULL;
97 	}
98 }
99 
100 int
101 spdk_nvme_transport_id_populate_trstring(struct spdk_nvme_transport_id *trid, const char *trstring)
102 {
103 	int len, i;
104 
105 	if (trstring == NULL) {
106 		return -EINVAL;
107 	}
108 
109 	len = strnlen(trstring, SPDK_NVMF_TRSTRING_MAX_LEN);
110 	if (len == SPDK_NVMF_TRSTRING_MAX_LEN) {
111 		return -EINVAL;
112 	}
113 
114 	/* cast official trstring to uppercase version of input. */
115 	for (i = 0; i < len; i++) {
116 		trid->trstring[i] = toupper(trstring[i]);
117 	}
118 	return 0;
119 }
120 
121 uint64_t
122 spdk_mem_map_translate(const struct spdk_mem_map *map, uint64_t vaddr, uint64_t *size)
123 {
124 	if (g_mr_size != 0) {
125 		*(uint32_t *)size = g_mr_size;
126 		if (g_mr_next_size != 0) {
127 			g_mr_size = g_mr_next_size;
128 		}
129 	}
130 
131 	return (uint64_t)&g_rdma_mr;
132 }
133 
134 static void reset_nvmf_rdma_request(struct spdk_nvmf_rdma_request *rdma_req)
135 {
136 	int i;
137 
138 	rdma_req->req.length = 0;
139 	rdma_req->req.data_from_pool = false;
140 	rdma_req->req.data = NULL;
141 	rdma_req->data.wr.num_sge = 0;
142 	rdma_req->data.wr.wr.rdma.remote_addr = 0;
143 	rdma_req->data.wr.wr.rdma.rkey = 0;
144 	memset(&rdma_req->req.dif, 0, sizeof(rdma_req->req.dif));
145 
146 	for (i = 0; i < SPDK_NVMF_MAX_SGL_ENTRIES; i++) {
147 		rdma_req->req.iov[i].iov_base = 0;
148 		rdma_req->req.iov[i].iov_len = 0;
149 		rdma_req->req.buffers[i] = 0;
150 		rdma_req->data.wr.sg_list[i].addr = 0;
151 		rdma_req->data.wr.sg_list[i].length = 0;
152 		rdma_req->data.wr.sg_list[i].lkey = 0;
153 	}
154 	rdma_req->req.iovcnt = 0;
155 }
156 
157 static void
158 test_spdk_nvmf_rdma_request_parse_sgl(void)
159 {
160 	struct spdk_nvmf_rdma_transport rtransport;
161 	struct spdk_nvmf_rdma_device device;
162 	struct spdk_nvmf_rdma_request rdma_req = {};
163 	struct spdk_nvmf_rdma_recv recv;
164 	struct spdk_nvmf_rdma_poll_group group;
165 	struct spdk_nvmf_rdma_qpair rqpair;
166 	struct spdk_nvmf_rdma_poller poller;
167 	union nvmf_c2h_msg cpl;
168 	union nvmf_h2c_msg cmd;
169 	struct spdk_nvme_sgl_descriptor *sgl;
170 	struct spdk_nvmf_transport_pg_cache_buf bufs[4];
171 	struct spdk_nvme_sgl_descriptor sgl_desc[SPDK_NVMF_MAX_SGL_ENTRIES] = {{0}};
172 	struct spdk_nvmf_rdma_request_data data;
173 	struct spdk_nvmf_transport_pg_cache_buf	buffer;
174 	struct spdk_nvmf_transport_pg_cache_buf	*buffer_ptr;
175 	int rc, i;
176 
177 	data.wr.sg_list = data.sgl;
178 	STAILQ_INIT(&group.group.buf_cache);
179 	group.group.buf_cache_size = 0;
180 	group.group.buf_cache_count = 0;
181 	group.group.transport = &rtransport.transport;
182 	STAILQ_INIT(&group.retired_bufs);
183 	poller.group = &group;
184 	rqpair.poller = &poller;
185 	rqpair.max_send_sge = SPDK_NVMF_MAX_SGL_ENTRIES;
186 
187 	sgl = &cmd.nvme_cmd.dptr.sgl1;
188 	rdma_req.recv = &recv;
189 	rdma_req.req.cmd = &cmd;
190 	rdma_req.req.rsp = &cpl;
191 	rdma_req.data.wr.sg_list = rdma_req.data.sgl;
192 	rdma_req.req.qpair = &rqpair.qpair;
193 	rdma_req.req.xfer = SPDK_NVME_DATA_CONTROLLER_TO_HOST;
194 
195 	rtransport.transport.opts = g_rdma_ut_transport_opts;
196 	rtransport.data_wr_pool = NULL;
197 	rtransport.transport.data_buf_pool = NULL;
198 
199 	device.attr.device_cap_flags = 0;
200 	g_rdma_mr.lkey = 0xABCD;
201 	sgl->keyed.key = 0xEEEE;
202 	sgl->address = 0xFFFF;
203 	rdma_req.recv->buf = (void *)0xDDDD;
204 
205 	/* Test 1: sgl type: keyed data block subtype: address */
206 	sgl->generic.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
207 	sgl->keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
208 
209 	/* Part 1: simple I/O, one SGL smaller than the transport io unit size */
210 	MOCK_SET(spdk_mempool_get, (void *)0x2000);
211 	reset_nvmf_rdma_request(&rdma_req);
212 	sgl->keyed.length = rtransport.transport.opts.io_unit_size / 2;
213 
214 	device.map = (void *)0x0;
215 	rc = spdk_nvmf_rdma_request_parse_sgl(&rtransport, &device, &rdma_req);
216 	CU_ASSERT(rc == 0);
217 	CU_ASSERT(rdma_req.req.data_from_pool == true);
218 	CU_ASSERT(rdma_req.req.length == rtransport.transport.opts.io_unit_size / 2);
219 	CU_ASSERT((uint64_t)rdma_req.req.data == 0x2000);
220 	CU_ASSERT(rdma_req.data.wr.num_sge == 1);
221 	CU_ASSERT(rdma_req.data.wr.wr.rdma.rkey == 0xEEEE);
222 	CU_ASSERT(rdma_req.data.wr.wr.rdma.remote_addr == 0xFFFF);
223 	CU_ASSERT((uint64_t)rdma_req.req.buffers[0] == 0x2000);
224 	CU_ASSERT(rdma_req.data.wr.sg_list[0].addr == 0x2000);
225 	CU_ASSERT(rdma_req.data.wr.sg_list[0].length == rtransport.transport.opts.io_unit_size / 2);
226 	CU_ASSERT(rdma_req.data.wr.sg_list[0].lkey == g_rdma_mr.lkey);
227 
228 	/* Part 2: simple I/O, one SGL larger than the transport io unit size (equal to the max io size) */
229 	reset_nvmf_rdma_request(&rdma_req);
230 	sgl->keyed.length = rtransport.transport.opts.io_unit_size * RDMA_UT_UNITS_IN_MAX_IO;
231 	rc = spdk_nvmf_rdma_request_parse_sgl(&rtransport, &device, &rdma_req);
232 
233 	CU_ASSERT(rc == 0);
234 	CU_ASSERT(rdma_req.req.data_from_pool == true);
235 	CU_ASSERT(rdma_req.req.length == rtransport.transport.opts.io_unit_size * RDMA_UT_UNITS_IN_MAX_IO);
236 	CU_ASSERT(rdma_req.data.wr.num_sge == RDMA_UT_UNITS_IN_MAX_IO);
237 	CU_ASSERT(rdma_req.data.wr.wr.rdma.rkey == 0xEEEE);
238 	CU_ASSERT(rdma_req.data.wr.wr.rdma.remote_addr == 0xFFFF);
239 	for (i = 0; i < RDMA_UT_UNITS_IN_MAX_IO; i++) {
240 		CU_ASSERT((uint64_t)rdma_req.req.buffers[i] == 0x2000);
241 		CU_ASSERT(rdma_req.data.wr.sg_list[i].addr == 0x2000);
242 		CU_ASSERT(rdma_req.data.wr.sg_list[i].length == rtransport.transport.opts.io_unit_size);
243 		CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == g_rdma_mr.lkey);
244 	}
245 
246 	/* Part 3: simple I/O one SGL larger than the transport max io size */
247 	reset_nvmf_rdma_request(&rdma_req);
248 	sgl->keyed.length = rtransport.transport.opts.max_io_size * 2;
249 	rc = spdk_nvmf_rdma_request_parse_sgl(&rtransport, &device, &rdma_req);
250 
251 	CU_ASSERT(rc == -1);
252 
253 	/* Part 4: Pretend there are no buffer pools */
254 	MOCK_SET(spdk_mempool_get, NULL);
255 	reset_nvmf_rdma_request(&rdma_req);
256 	sgl->keyed.length = rtransport.transport.opts.io_unit_size * RDMA_UT_UNITS_IN_MAX_IO;
257 	rc = spdk_nvmf_rdma_request_parse_sgl(&rtransport, &device, &rdma_req);
258 
259 	CU_ASSERT(rc == 0);
260 	CU_ASSERT(rdma_req.req.data_from_pool == false);
261 	CU_ASSERT(rdma_req.req.data == NULL);
262 	CU_ASSERT(rdma_req.data.wr.num_sge == 0);
263 	CU_ASSERT(rdma_req.req.buffers[0] == NULL);
264 	CU_ASSERT(rdma_req.data.wr.sg_list[0].addr == 0);
265 	CU_ASSERT(rdma_req.data.wr.sg_list[0].length == 0);
266 	CU_ASSERT(rdma_req.data.wr.sg_list[0].lkey == 0);
267 
268 	rdma_req.recv->buf = (void *)0xDDDD;
269 	/* Test 2: sgl type: keyed data block subtype: offset (in capsule data) */
270 	sgl->generic.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK;
271 	sgl->unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET;
272 
273 	/* Part 1: Normal I/O smaller than in capsule data size no offset */
274 	reset_nvmf_rdma_request(&rdma_req);
275 	sgl->address = 0;
276 	sgl->unkeyed.length = rtransport.transport.opts.in_capsule_data_size;
277 	rc = spdk_nvmf_rdma_request_parse_sgl(&rtransport, &device, &rdma_req);
278 
279 	CU_ASSERT(rc == 0);
280 	CU_ASSERT(rdma_req.req.data == (void *)0xDDDD);
281 	CU_ASSERT(rdma_req.req.length == rtransport.transport.opts.in_capsule_data_size);
282 	CU_ASSERT(rdma_req.req.data_from_pool == false);
283 
284 	/* Part 2: I/O offset + length too large */
285 	reset_nvmf_rdma_request(&rdma_req);
286 	sgl->address = rtransport.transport.opts.in_capsule_data_size;
287 	sgl->unkeyed.length = rtransport.transport.opts.in_capsule_data_size;
288 	rc = spdk_nvmf_rdma_request_parse_sgl(&rtransport, &device, &rdma_req);
289 
290 	CU_ASSERT(rc == -1);
291 
292 	/* Part 3: I/O too large */
293 	reset_nvmf_rdma_request(&rdma_req);
294 	sgl->address = 0;
295 	sgl->unkeyed.length = rtransport.transport.opts.in_capsule_data_size * 2;
296 	rc = spdk_nvmf_rdma_request_parse_sgl(&rtransport, &device, &rdma_req);
297 
298 	CU_ASSERT(rc == -1);
299 
300 	/* Test 3: Multi SGL */
301 	sgl->generic.type = SPDK_NVME_SGL_TYPE_LAST_SEGMENT;
302 	sgl->unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET;
303 	sgl->address = 0;
304 	rdma_req.recv->buf = (void *)&sgl_desc;
305 	MOCK_SET(spdk_mempool_get, &data);
306 
307 	/* part 1: 2 segments each with 1 wr. */
308 	reset_nvmf_rdma_request(&rdma_req);
309 	sgl->unkeyed.length = 2 * sizeof(struct spdk_nvme_sgl_descriptor);
310 	for (i = 0; i < 2; i++) {
311 		sgl_desc[i].keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
312 		sgl_desc[i].keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
313 		sgl_desc[i].keyed.length = rtransport.transport.opts.io_unit_size;
314 		sgl_desc[i].address = 0x4000 + i * rtransport.transport.opts.io_unit_size;
315 		sgl_desc[i].keyed.key = 0x44;
316 	}
317 
318 	rc = spdk_nvmf_rdma_request_parse_sgl(&rtransport, &device, &rdma_req);
319 
320 	CU_ASSERT(rc == 0);
321 	CU_ASSERT(rdma_req.req.data_from_pool == true);
322 	CU_ASSERT(rdma_req.req.length == rtransport.transport.opts.io_unit_size * 2);
323 	CU_ASSERT(rdma_req.data.wr.num_sge == 1);
324 	CU_ASSERT(rdma_req.data.wr.wr.rdma.rkey == 0x44);
325 	CU_ASSERT(rdma_req.data.wr.wr.rdma.remote_addr == 0x4000);
326 	CU_ASSERT(rdma_req.data.wr.next == &data.wr);
327 	CU_ASSERT(data.wr.wr.rdma.rkey == 0x44);
328 	CU_ASSERT(data.wr.wr.rdma.remote_addr == 0x4000 + rtransport.transport.opts.io_unit_size);
329 	CU_ASSERT(data.wr.num_sge == 1);
330 	CU_ASSERT(data.wr.next == &rdma_req.rsp.wr);
331 
332 	/* part 2: 2 segments, each with 1 wr containing 8 sge_elements */
333 	reset_nvmf_rdma_request(&rdma_req);
334 	sgl->unkeyed.length = 2 * sizeof(struct spdk_nvme_sgl_descriptor);
335 	for (i = 0; i < 2; i++) {
336 		sgl_desc[i].keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
337 		sgl_desc[i].keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
338 		sgl_desc[i].keyed.length = rtransport.transport.opts.io_unit_size * 8;
339 		sgl_desc[i].address = 0x4000 + i * 8 * rtransport.transport.opts.io_unit_size;
340 		sgl_desc[i].keyed.key = 0x44;
341 	}
342 
343 	rc = spdk_nvmf_rdma_request_parse_sgl(&rtransport, &device, &rdma_req);
344 
345 	CU_ASSERT(rc == 0);
346 	CU_ASSERT(rdma_req.req.data_from_pool == true);
347 	CU_ASSERT(rdma_req.req.length == rtransport.transport.opts.io_unit_size * 16);
348 	CU_ASSERT(rdma_req.req.iovcnt == 16);
349 	CU_ASSERT(rdma_req.data.wr.num_sge == 8);
350 	CU_ASSERT(rdma_req.data.wr.wr.rdma.rkey == 0x44);
351 	CU_ASSERT(rdma_req.data.wr.wr.rdma.remote_addr == 0x4000);
352 	CU_ASSERT(rdma_req.data.wr.next == &data.wr);
353 	CU_ASSERT(data.wr.wr.rdma.rkey == 0x44);
354 	CU_ASSERT(data.wr.wr.rdma.remote_addr == 0x4000 + rtransport.transport.opts.io_unit_size * 8);
355 	CU_ASSERT(data.wr.num_sge == 8);
356 	CU_ASSERT(data.wr.next == &rdma_req.rsp.wr);
357 
358 	/* part 3: 2 segments, one very large, one very small */
359 	reset_nvmf_rdma_request(&rdma_req);
360 	for (i = 0; i < 2; i++) {
361 		sgl_desc[i].keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
362 		sgl_desc[i].keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
363 		sgl_desc[i].keyed.key = 0x44;
364 	}
365 
366 	sgl_desc[0].keyed.length = rtransport.transport.opts.io_unit_size * 15 +
367 				   rtransport.transport.opts.io_unit_size / 2;
368 	sgl_desc[0].address = 0x4000;
369 	sgl_desc[1].keyed.length = rtransport.transport.opts.io_unit_size / 2;
370 	sgl_desc[1].address = 0x4000 + rtransport.transport.opts.io_unit_size * 15 +
371 			      rtransport.transport.opts.io_unit_size / 2;
372 
373 	rc = spdk_nvmf_rdma_request_parse_sgl(&rtransport, &device, &rdma_req);
374 
375 	CU_ASSERT(rc == 0);
376 	CU_ASSERT(rdma_req.req.data_from_pool == true);
377 	CU_ASSERT(rdma_req.req.length == rtransport.transport.opts.io_unit_size * 16);
378 	CU_ASSERT(rdma_req.req.iovcnt == 17);
379 	CU_ASSERT(rdma_req.data.wr.num_sge == 16);
380 	for (i = 0; i < 15; i++) {
381 		CU_ASSERT(rdma_req.data.sgl[i].length == rtransport.transport.opts.io_unit_size);
382 	}
383 	CU_ASSERT(rdma_req.data.sgl[15].length == rtransport.transport.opts.io_unit_size / 2);
384 	CU_ASSERT(rdma_req.data.wr.wr.rdma.rkey == 0x44);
385 	CU_ASSERT(rdma_req.data.wr.wr.rdma.remote_addr == 0x4000);
386 	CU_ASSERT(rdma_req.data.wr.next == &data.wr);
387 	CU_ASSERT(data.wr.wr.rdma.rkey == 0x44);
388 	CU_ASSERT(data.wr.wr.rdma.remote_addr == 0x4000 + rtransport.transport.opts.io_unit_size * 15 +
389 		  rtransport.transport.opts.io_unit_size / 2);
390 	CU_ASSERT(data.sgl[0].length == rtransport.transport.opts.io_unit_size / 2);
391 	CU_ASSERT(data.wr.num_sge == 1);
392 	CU_ASSERT(data.wr.next == &rdma_req.rsp.wr);
393 
394 	/* Test 4: use PG buffer cache */
395 	sgl->generic.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
396 	sgl->keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
397 	sgl->address = 0xFFFF;
398 	rdma_req.recv->buf = (void *)0xDDDD;
399 	g_rdma_mr.lkey = 0xABCD;
400 	sgl->keyed.key = 0xEEEE;
401 
402 	for (i = 0; i < 4; i++) {
403 		STAILQ_INSERT_TAIL(&group.group.buf_cache, &bufs[i], link);
404 	}
405 
406 	/* part 1: use the four buffers from the pg cache */
407 	group.group.buf_cache_size = 4;
408 	group.group.buf_cache_count = 4;
409 	MOCK_SET(spdk_mempool_get, (void *)0x2000);
410 	reset_nvmf_rdma_request(&rdma_req);
411 	sgl->keyed.length = rtransport.transport.opts.io_unit_size * 4;
412 	rc = spdk_nvmf_rdma_request_parse_sgl(&rtransport, &device, &rdma_req);
413 
414 	SPDK_CU_ASSERT_FATAL(rc == 0);
415 	CU_ASSERT(rdma_req.req.data_from_pool == true);
416 	CU_ASSERT(rdma_req.req.length == rtransport.transport.opts.io_unit_size * 4);
417 	CU_ASSERT((uint64_t)rdma_req.req.data == (((uint64_t)&bufs[0] + NVMF_DATA_BUFFER_MASK) &
418 			~NVMF_DATA_BUFFER_MASK));
419 	CU_ASSERT(rdma_req.data.wr.num_sge == 4);
420 	CU_ASSERT(rdma_req.data.wr.wr.rdma.rkey == 0xEEEE);
421 	CU_ASSERT(rdma_req.data.wr.wr.rdma.remote_addr == 0xFFFF);
422 	CU_ASSERT(group.group.buf_cache_count == 0);
423 	CU_ASSERT(STAILQ_EMPTY(&group.group.buf_cache));
424 	for (i = 0; i < 4; i++) {
425 		CU_ASSERT((uint64_t)rdma_req.req.buffers[i] == (uint64_t)&bufs[i]);
426 		CU_ASSERT(rdma_req.data.wr.sg_list[i].addr == (((uint64_t)&bufs[i] + NVMF_DATA_BUFFER_MASK) &
427 				~NVMF_DATA_BUFFER_MASK));
428 		CU_ASSERT(rdma_req.data.wr.sg_list[i].length == rtransport.transport.opts.io_unit_size);
429 	}
430 
431 	/* part 2: now that we have used the buffers from the cache, try again. We should get mempool buffers. */
432 	reset_nvmf_rdma_request(&rdma_req);
433 	rc = spdk_nvmf_rdma_request_parse_sgl(&rtransport, &device, &rdma_req);
434 
435 	SPDK_CU_ASSERT_FATAL(rc == 0);
436 	CU_ASSERT(rdma_req.req.data_from_pool == true);
437 	CU_ASSERT(rdma_req.req.length == rtransport.transport.opts.io_unit_size * 4);
438 	CU_ASSERT((uint64_t)rdma_req.req.data == 0x2000);
439 	CU_ASSERT(rdma_req.data.wr.num_sge == 4);
440 	CU_ASSERT(rdma_req.data.wr.wr.rdma.rkey == 0xEEEE);
441 	CU_ASSERT(rdma_req.data.wr.wr.rdma.remote_addr == 0xFFFF);
442 	CU_ASSERT(group.group.buf_cache_count == 0);
443 	CU_ASSERT(STAILQ_EMPTY(&group.group.buf_cache));
444 	for (i = 0; i < 4; i++) {
445 		CU_ASSERT((uint64_t)rdma_req.req.buffers[i] == 0x2000);
446 		CU_ASSERT(rdma_req.data.wr.sg_list[i].addr == 0x2000);
447 		CU_ASSERT(rdma_req.data.wr.sg_list[i].length == rtransport.transport.opts.io_unit_size);
448 		CU_ASSERT(group.group.buf_cache_count == 0);
449 	}
450 
451 	/* part 3: half and half */
452 	group.group.buf_cache_count = 2;
453 
454 	for (i = 0; i < 2; i++) {
455 		STAILQ_INSERT_TAIL(&group.group.buf_cache, &bufs[i], link);
456 	}
457 	reset_nvmf_rdma_request(&rdma_req);
458 	rc = spdk_nvmf_rdma_request_parse_sgl(&rtransport, &device, &rdma_req);
459 
460 	SPDK_CU_ASSERT_FATAL(rc == 0);
461 	CU_ASSERT(rdma_req.req.data_from_pool == true);
462 	CU_ASSERT(rdma_req.req.length == rtransport.transport.opts.io_unit_size * 4);
463 	CU_ASSERT((uint64_t)rdma_req.req.data == (((uint64_t)&bufs[0] + NVMF_DATA_BUFFER_MASK) &
464 			~NVMF_DATA_BUFFER_MASK));
465 	CU_ASSERT(rdma_req.data.wr.num_sge == 4);
466 	CU_ASSERT(rdma_req.data.wr.wr.rdma.rkey == 0xEEEE);
467 	CU_ASSERT(rdma_req.data.wr.wr.rdma.remote_addr == 0xFFFF);
468 	CU_ASSERT(group.group.buf_cache_count == 0);
469 	for (i = 0; i < 2; i++) {
470 		CU_ASSERT((uint64_t)rdma_req.req.buffers[i] == (uint64_t)&bufs[i]);
471 		CU_ASSERT(rdma_req.data.wr.sg_list[i].addr == (((uint64_t)&bufs[i] + NVMF_DATA_BUFFER_MASK) &
472 				~NVMF_DATA_BUFFER_MASK));
473 		CU_ASSERT(rdma_req.data.wr.sg_list[i].length == rtransport.transport.opts.io_unit_size);
474 	}
475 	for (i = 2; i < 4; i++) {
476 		CU_ASSERT((uint64_t)rdma_req.req.buffers[i] == 0x2000);
477 		CU_ASSERT(rdma_req.data.wr.sg_list[i].addr == 0x2000);
478 		CU_ASSERT(rdma_req.data.wr.sg_list[i].length == rtransport.transport.opts.io_unit_size);
479 	}
480 
481 	reset_nvmf_rdma_request(&rdma_req);
482 	/* Test 5 dealing with a buffer split over two Memory Regions */
483 	MOCK_SET(spdk_mempool_get, (void *)&buffer);
484 	sgl->generic.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
485 	sgl->keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
486 	sgl->keyed.length = rtransport.transport.opts.io_unit_size / 2;
487 	g_mr_size = rtransport.transport.opts.io_unit_size / 4;
488 	g_mr_next_size = rtransport.transport.opts.io_unit_size / 2;
489 
490 	rc = spdk_nvmf_rdma_request_parse_sgl(&rtransport, &device, &rdma_req);
491 	SPDK_CU_ASSERT_FATAL(rc == 0);
492 	CU_ASSERT(rdma_req.req.data_from_pool == true);
493 	CU_ASSERT(rdma_req.req.length == rtransport.transport.opts.io_unit_size / 2);
494 	CU_ASSERT((uint64_t)rdma_req.req.data == (((uint64_t)&buffer + NVMF_DATA_BUFFER_MASK) &
495 			~NVMF_DATA_BUFFER_MASK));
496 	CU_ASSERT(rdma_req.data.wr.num_sge == 1);
497 	CU_ASSERT(rdma_req.data.wr.wr.rdma.rkey == 0xEEEE);
498 	CU_ASSERT(rdma_req.data.wr.wr.rdma.remote_addr == 0xFFFF);
499 	CU_ASSERT(rdma_req.req.buffers[0] == &buffer);
500 	CU_ASSERT(rdma_req.data.wr.sg_list[0].addr == (((uint64_t)&buffer + NVMF_DATA_BUFFER_MASK) &
501 			~NVMF_DATA_BUFFER_MASK));
502 	CU_ASSERT(rdma_req.data.wr.sg_list[0].length == rtransport.transport.opts.io_unit_size / 2);
503 	CU_ASSERT(rdma_req.data.wr.sg_list[0].lkey == g_rdma_mr.lkey);
504 	buffer_ptr = STAILQ_FIRST(&group.retired_bufs);
505 	CU_ASSERT(buffer_ptr == &buffer);
506 	STAILQ_REMOVE(&group.retired_bufs, buffer_ptr, spdk_nvmf_transport_pg_cache_buf, link);
507 	CU_ASSERT(STAILQ_EMPTY(&group.retired_bufs));
508 	g_mr_size = 0;
509 	g_mr_next_size = 0;
510 
511 	reset_nvmf_rdma_request(&rdma_req);
512 }
513 
514 static struct spdk_nvmf_rdma_recv *
515 create_recv(struct spdk_nvmf_rdma_qpair *rqpair, enum spdk_nvme_nvm_opcode opc)
516 {
517 	struct spdk_nvmf_rdma_recv *rdma_recv;
518 	union nvmf_h2c_msg *cmd;
519 	struct spdk_nvme_sgl_descriptor *sgl;
520 
521 	rdma_recv = calloc(1, sizeof(*rdma_recv));
522 	rdma_recv->qpair = rqpair;
523 	cmd = calloc(1, sizeof(*cmd));
524 	rdma_recv->sgl[0].addr = (uintptr_t)cmd;
525 	cmd->nvme_cmd.opc = opc;
526 	sgl = &cmd->nvme_cmd.dptr.sgl1;
527 	sgl->keyed.key = 0xEEEE;
528 	sgl->address = 0xFFFF;
529 	sgl->keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
530 	sgl->keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
531 	sgl->keyed.length = 1;
532 
533 	return rdma_recv;
534 }
535 
536 static void
537 free_recv(struct spdk_nvmf_rdma_recv *rdma_recv)
538 {
539 	free((void *)rdma_recv->sgl[0].addr);
540 	free(rdma_recv);
541 }
542 
543 static struct spdk_nvmf_rdma_request *
544 create_req(struct spdk_nvmf_rdma_qpair *rqpair,
545 	   struct spdk_nvmf_rdma_recv *rdma_recv)
546 {
547 	struct spdk_nvmf_rdma_request *rdma_req;
548 	union nvmf_c2h_msg *cpl;
549 
550 	rdma_req = calloc(1, sizeof(*rdma_req));
551 	rdma_req->recv = rdma_recv;
552 	rdma_req->req.qpair = &rqpair->qpair;
553 	rdma_req->state = RDMA_REQUEST_STATE_NEW;
554 	rdma_req->data.wr.wr_id = (uintptr_t)&rdma_req->data.rdma_wr;
555 	rdma_req->data.wr.sg_list = rdma_req->data.sgl;
556 	cpl = calloc(1, sizeof(*cpl));
557 	rdma_req->rsp.sgl[0].addr = (uintptr_t)cpl;
558 	rdma_req->req.rsp = cpl;
559 
560 	return rdma_req;
561 }
562 
563 static void
564 free_req(struct spdk_nvmf_rdma_request *rdma_req)
565 {
566 	free((void *)rdma_req->rsp.sgl[0].addr);
567 	free(rdma_req);
568 }
569 
570 static void
571 qpair_reset(struct spdk_nvmf_rdma_qpair *rqpair,
572 	    struct spdk_nvmf_rdma_poller *poller,
573 	    struct spdk_nvmf_rdma_device *device,
574 	    struct spdk_nvmf_rdma_resources *resources)
575 {
576 	memset(rqpair, 0, sizeof(*rqpair));
577 	STAILQ_INIT(&rqpair->pending_rdma_write_queue);
578 	STAILQ_INIT(&rqpair->pending_rdma_read_queue);
579 	rqpair->poller = poller;
580 	rqpair->device = device;
581 	rqpair->resources = resources;
582 	rqpair->qpair.qid = 1;
583 	rqpair->ibv_state = IBV_QPS_RTS;
584 	rqpair->qpair.state = SPDK_NVMF_QPAIR_ACTIVE;
585 	rqpair->max_send_sge = SPDK_NVMF_MAX_SGL_ENTRIES;
586 	rqpair->max_send_depth = 16;
587 	rqpair->max_read_depth = 16;
588 	resources->recvs_to_post.first = resources->recvs_to_post.last = NULL;
589 }
590 
591 static void
592 poller_reset(struct spdk_nvmf_rdma_poller *poller,
593 	     struct spdk_nvmf_rdma_poll_group *group)
594 {
595 	memset(poller, 0, sizeof(*poller));
596 	STAILQ_INIT(&poller->qpairs_pending_recv);
597 	STAILQ_INIT(&poller->qpairs_pending_send);
598 	poller->group = group;
599 }
600 
601 static void
602 test_spdk_nvmf_rdma_request_process(void)
603 {
604 	struct spdk_nvmf_rdma_transport rtransport = {};
605 	struct spdk_nvmf_rdma_poll_group group = {};
606 	struct spdk_nvmf_rdma_poller poller = {};
607 	struct spdk_nvmf_rdma_device device = {};
608 	struct spdk_nvmf_rdma_resources resources = {};
609 	struct spdk_nvmf_rdma_qpair rqpair = {};
610 	struct spdk_nvmf_rdma_recv *rdma_recv;
611 	struct spdk_nvmf_rdma_request *rdma_req;
612 	bool progress;
613 
614 	STAILQ_INIT(&group.group.buf_cache);
615 	STAILQ_INIT(&group.group.pending_buf_queue);
616 	group.group.buf_cache_size = 0;
617 	group.group.buf_cache_count = 0;
618 	poller_reset(&poller, &group);
619 	qpair_reset(&rqpair, &poller, &device, &resources);
620 
621 	rtransport.transport.opts = g_rdma_ut_transport_opts;
622 	rtransport.transport.data_buf_pool = spdk_mempool_create("test_data_pool", 16, 128, 0, 0);
623 	rtransport.data_wr_pool = spdk_mempool_create("test_wr_pool", 128,
624 				  sizeof(struct spdk_nvmf_rdma_request_data),
625 				  0, 0);
626 	MOCK_CLEAR(spdk_mempool_get);
627 
628 	device.attr.device_cap_flags = 0;
629 	device.map = (void *)0x0;
630 	g_rdma_mr.lkey = 0xABCD;
631 
632 	/* Test 1: single SGL READ request */
633 	rdma_recv = create_recv(&rqpair, SPDK_NVME_OPC_READ);
634 	rdma_req = create_req(&rqpair, rdma_recv);
635 	rqpair.current_recv_depth = 1;
636 	/* NEW -> EXECUTING */
637 	progress = spdk_nvmf_rdma_request_process(&rtransport, rdma_req);
638 	CU_ASSERT(progress == true);
639 	CU_ASSERT(rdma_req->state == RDMA_REQUEST_STATE_EXECUTING);
640 	CU_ASSERT(rdma_req->req.xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST);
641 	/* EXECUTED -> TRANSFERRING_C2H */
642 	rdma_req->state = RDMA_REQUEST_STATE_EXECUTED;
643 	progress = spdk_nvmf_rdma_request_process(&rtransport, rdma_req);
644 	CU_ASSERT(progress == true);
645 	CU_ASSERT(rdma_req->state == RDMA_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST);
646 	CU_ASSERT(rdma_req->recv == NULL);
647 	CU_ASSERT(rqpair.sends_to_post.first == &rdma_req->data.wr);
648 	CU_ASSERT(rqpair.sends_to_post.last == &rdma_req->rsp.wr);
649 	CU_ASSERT(resources.recvs_to_post.first == &rdma_recv->wr);
650 	CU_ASSERT(resources.recvs_to_post.last == &rdma_recv->wr);
651 	/* COMPLETED -> FREE */
652 	rdma_req->state = RDMA_REQUEST_STATE_COMPLETED;
653 	progress = spdk_nvmf_rdma_request_process(&rtransport, rdma_req);
654 	CU_ASSERT(progress == true);
655 	CU_ASSERT(rdma_req->state == RDMA_REQUEST_STATE_FREE);
656 
657 	free_recv(rdma_recv);
658 	free_req(rdma_req);
659 	poller_reset(&poller, &group);
660 	qpair_reset(&rqpair, &poller, &device, &resources);
661 
662 	/* Test 2: single SGL WRITE request */
663 	rdma_recv = create_recv(&rqpair, SPDK_NVME_OPC_WRITE);
664 	rdma_req = create_req(&rqpair, rdma_recv);
665 	rqpair.current_recv_depth = 1;
666 	/* NEW -> TRANSFERRING_H2C */
667 	progress = spdk_nvmf_rdma_request_process(&rtransport, rdma_req);
668 	CU_ASSERT(progress == true);
669 	CU_ASSERT(rdma_req->state == RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
670 	CU_ASSERT(rdma_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER);
671 	CU_ASSERT(rqpair.sends_to_post.first == &rdma_req->data.wr);
672 	CU_ASSERT(rqpair.sends_to_post.last == &rdma_req->data.wr);
673 	rqpair.sends_to_post.first = rqpair.sends_to_post.last = NULL;
674 	STAILQ_INIT(&poller.qpairs_pending_send);
675 	/* READY_TO_EXECUTE -> EXECUTING */
676 	rdma_req->state = RDMA_REQUEST_STATE_READY_TO_EXECUTE;
677 	progress = spdk_nvmf_rdma_request_process(&rtransport, rdma_req);
678 	CU_ASSERT(progress == true);
679 	CU_ASSERT(rdma_req->state == RDMA_REQUEST_STATE_EXECUTING);
680 	/* EXECUTED -> COMPLETING */
681 	rdma_req->state = RDMA_REQUEST_STATE_EXECUTED;
682 	progress = spdk_nvmf_rdma_request_process(&rtransport, rdma_req);
683 	CU_ASSERT(progress == true);
684 	CU_ASSERT(rdma_req->state == RDMA_REQUEST_STATE_COMPLETING);
685 	CU_ASSERT(rdma_req->recv == NULL);
686 	CU_ASSERT(rqpair.sends_to_post.first == &rdma_req->rsp.wr);
687 	CU_ASSERT(rqpair.sends_to_post.last == &rdma_req->rsp.wr);
688 	CU_ASSERT(resources.recvs_to_post.first == &rdma_recv->wr);
689 	CU_ASSERT(resources.recvs_to_post.last == &rdma_recv->wr);
690 	/* COMPLETED -> FREE */
691 	rdma_req->state = RDMA_REQUEST_STATE_COMPLETED;
692 	progress = spdk_nvmf_rdma_request_process(&rtransport, rdma_req);
693 	CU_ASSERT(progress == true);
694 	CU_ASSERT(rdma_req->state == RDMA_REQUEST_STATE_FREE);
695 
696 	free_recv(rdma_recv);
697 	free_req(rdma_req);
698 	poller_reset(&poller, &group);
699 	qpair_reset(&rqpair, &poller, &device, &resources);
700 
701 	/* Test 3: WRITE+WRITE ibv_send batching */
702 	{
703 		struct spdk_nvmf_rdma_recv *recv1, *recv2;
704 		struct spdk_nvmf_rdma_request *req1, *req2;
705 		recv1 = create_recv(&rqpair, SPDK_NVME_OPC_WRITE);
706 		req1 = create_req(&rqpair, recv1);
707 		recv2 = create_recv(&rqpair, SPDK_NVME_OPC_WRITE);
708 		req2 = create_req(&rqpair, recv2);
709 
710 		/* WRITE 1: NEW -> TRANSFERRING_H2C */
711 		rqpair.current_recv_depth = 1;
712 		spdk_nvmf_rdma_request_process(&rtransport, req1);
713 		CU_ASSERT(req1->state == RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
714 		/* WRITE 1 is the first in batching list */
715 		CU_ASSERT(rqpair.sends_to_post.first == &req1->data.wr);
716 		CU_ASSERT(rqpair.sends_to_post.last == &req1->data.wr);
717 
718 		/* WRITE 2: NEW -> TRANSFERRING_H2C */
719 		rqpair.current_recv_depth = 2;
720 		spdk_nvmf_rdma_request_process(&rtransport, req2);
721 		CU_ASSERT(req2->state == RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
722 		/* WRITE 2 is now also in the batching list */
723 		CU_ASSERT(rqpair.sends_to_post.first->next == &req2->data.wr);
724 		CU_ASSERT(rqpair.sends_to_post.last == &req2->data.wr);
725 
726 		/* Send everything */
727 		rqpair.sends_to_post.first = rqpair.sends_to_post.last = NULL;
728 		STAILQ_INIT(&poller.qpairs_pending_send);
729 
730 		/* WRITE 1 completes before WRITE 2 has finished RDMA reading */
731 		/* WRITE 1: READY_TO_EXECUTE -> EXECUTING */
732 		req1->state = RDMA_REQUEST_STATE_READY_TO_EXECUTE;
733 		spdk_nvmf_rdma_request_process(&rtransport, req1);
734 		CU_ASSERT(req1->state == RDMA_REQUEST_STATE_EXECUTING);
735 		/* WRITE 1: EXECUTED -> COMPLETING */
736 		req1->state = RDMA_REQUEST_STATE_EXECUTED;
737 		spdk_nvmf_rdma_request_process(&rtransport, req1);
738 		CU_ASSERT(req1->state == RDMA_REQUEST_STATE_COMPLETING);
739 		CU_ASSERT(rqpair.sends_to_post.first == &req1->rsp.wr);
740 		CU_ASSERT(rqpair.sends_to_post.last == &req1->rsp.wr);
741 		rqpair.sends_to_post.first = rqpair.sends_to_post.last = NULL;
742 		STAILQ_INIT(&poller.qpairs_pending_send);
743 		/* WRITE 1: COMPLETED -> FREE */
744 		req1->state = RDMA_REQUEST_STATE_COMPLETED;
745 		spdk_nvmf_rdma_request_process(&rtransport, req1);
746 		CU_ASSERT(req1->state == RDMA_REQUEST_STATE_FREE);
747 
748 		/* Now WRITE 2 has finished reading and completes */
749 		/* WRITE 2: COMPLETED -> FREE */
750 		/* WRITE 2: READY_TO_EXECUTE -> EXECUTING */
751 		req2->state = RDMA_REQUEST_STATE_READY_TO_EXECUTE;
752 		spdk_nvmf_rdma_request_process(&rtransport, req2);
753 		CU_ASSERT(req2->state == RDMA_REQUEST_STATE_EXECUTING);
754 		/* WRITE 1: EXECUTED -> COMPLETING */
755 		req2->state = RDMA_REQUEST_STATE_EXECUTED;
756 		spdk_nvmf_rdma_request_process(&rtransport, req2);
757 		CU_ASSERT(req2->state == RDMA_REQUEST_STATE_COMPLETING);
758 		CU_ASSERT(rqpair.sends_to_post.first == &req2->rsp.wr);
759 		CU_ASSERT(rqpair.sends_to_post.last == &req2->rsp.wr);
760 		rqpair.sends_to_post.first = rqpair.sends_to_post.last = NULL;
761 		STAILQ_INIT(&poller.qpairs_pending_send);
762 		/* WRITE 1: COMPLETED -> FREE */
763 		req2->state = RDMA_REQUEST_STATE_COMPLETED;
764 		spdk_nvmf_rdma_request_process(&rtransport, req2);
765 		CU_ASSERT(req2->state == RDMA_REQUEST_STATE_FREE);
766 
767 		free_recv(recv1);
768 		free_req(req1);
769 		free_recv(recv2);
770 		free_req(req2);
771 		poller_reset(&poller, &group);
772 		qpair_reset(&rqpair, &poller, &device, &resources);
773 	}
774 
775 	spdk_mempool_free(rtransport.transport.data_buf_pool);
776 	spdk_mempool_free(rtransport.data_wr_pool);
777 }
778 
779 #define TEST_GROUPS_COUNT 5
780 static void
781 test_spdk_nvmf_rdma_get_optimal_poll_group(void)
782 {
783 	struct spdk_nvmf_rdma_transport rtransport = {};
784 	struct spdk_nvmf_transport *transport = &rtransport.transport;
785 	struct spdk_nvmf_rdma_qpair rqpair = {};
786 	struct spdk_nvmf_transport_poll_group *groups[TEST_GROUPS_COUNT];
787 	struct spdk_nvmf_rdma_poll_group *rgroups[TEST_GROUPS_COUNT];
788 	struct spdk_nvmf_transport_poll_group *result;
789 	uint32_t i;
790 
791 	rqpair.qpair.transport = transport;
792 	pthread_mutex_init(&rtransport.lock, NULL);
793 	TAILQ_INIT(&rtransport.poll_groups);
794 
795 	for (i = 0; i < TEST_GROUPS_COUNT; i++) {
796 		groups[i] = spdk_nvmf_rdma_poll_group_create(transport);
797 		CU_ASSERT(groups[i] != NULL);
798 		rgroups[i] = SPDK_CONTAINEROF(groups[i], struct spdk_nvmf_rdma_poll_group, group);
799 		groups[i]->transport = transport;
800 	}
801 	CU_ASSERT(rtransport.conn_sched.next_admin_pg == rgroups[0]);
802 	CU_ASSERT(rtransport.conn_sched.next_io_pg == rgroups[0]);
803 
804 	/* Emulate connection of %TEST_GROUPS_COUNT% initiators - each creates 1 admin and 1 io qp */
805 	for (i = 0; i < TEST_GROUPS_COUNT; i++) {
806 		rqpair.qpair.qid = 0;
807 		result = spdk_nvmf_rdma_get_optimal_poll_group(&rqpair.qpair);
808 		CU_ASSERT(result == groups[i]);
809 		CU_ASSERT(rtransport.conn_sched.next_admin_pg == rgroups[(i + 1) % TEST_GROUPS_COUNT]);
810 		CU_ASSERT(rtransport.conn_sched.next_io_pg == rgroups[i]);
811 
812 		rqpair.qpair.qid = 1;
813 		result = spdk_nvmf_rdma_get_optimal_poll_group(&rqpair.qpair);
814 		CU_ASSERT(result == groups[i]);
815 		CU_ASSERT(rtransport.conn_sched.next_admin_pg == rgroups[(i + 1) % TEST_GROUPS_COUNT]);
816 		CU_ASSERT(rtransport.conn_sched.next_io_pg == rgroups[(i + 1) % TEST_GROUPS_COUNT]);
817 	}
818 	/* wrap around, admin/io pg point to the first pg
819 	   Destroy all poll groups except of the last one */
820 	for (i = 0; i < TEST_GROUPS_COUNT - 1; i++) {
821 		spdk_nvmf_rdma_poll_group_destroy(groups[i]);
822 		CU_ASSERT(rtransport.conn_sched.next_admin_pg == rgroups[i + 1]);
823 		CU_ASSERT(rtransport.conn_sched.next_io_pg == rgroups[i + 1]);
824 	}
825 
826 	CU_ASSERT(rtransport.conn_sched.next_admin_pg == rgroups[TEST_GROUPS_COUNT - 1]);
827 	CU_ASSERT(rtransport.conn_sched.next_io_pg == rgroups[TEST_GROUPS_COUNT - 1]);
828 
829 	/* Check that pointers to the next admin/io poll groups are not changed */
830 	rqpair.qpair.qid = 0;
831 	result = spdk_nvmf_rdma_get_optimal_poll_group(&rqpair.qpair);
832 	CU_ASSERT(result == groups[TEST_GROUPS_COUNT - 1]);
833 	CU_ASSERT(rtransport.conn_sched.next_admin_pg == rgroups[TEST_GROUPS_COUNT - 1]);
834 	CU_ASSERT(rtransport.conn_sched.next_io_pg == rgroups[TEST_GROUPS_COUNT - 1]);
835 
836 	rqpair.qpair.qid = 1;
837 	result = spdk_nvmf_rdma_get_optimal_poll_group(&rqpair.qpair);
838 	CU_ASSERT(result == groups[TEST_GROUPS_COUNT - 1]);
839 	CU_ASSERT(rtransport.conn_sched.next_admin_pg == rgroups[TEST_GROUPS_COUNT - 1]);
840 	CU_ASSERT(rtransport.conn_sched.next_io_pg == rgroups[TEST_GROUPS_COUNT - 1]);
841 
842 	/* Remove the last poll group, check that pointers are NULL */
843 	spdk_nvmf_rdma_poll_group_destroy(groups[TEST_GROUPS_COUNT - 1]);
844 	CU_ASSERT(rtransport.conn_sched.next_admin_pg == NULL);
845 	CU_ASSERT(rtransport.conn_sched.next_io_pg == NULL);
846 
847 	/* Request optimal poll group, result must be NULL */
848 	rqpair.qpair.qid = 0;
849 	result = spdk_nvmf_rdma_get_optimal_poll_group(&rqpair.qpair);
850 	CU_ASSERT(result == NULL);
851 
852 	rqpair.qpair.qid = 1;
853 	result = spdk_nvmf_rdma_get_optimal_poll_group(&rqpair.qpair);
854 	CU_ASSERT(result == NULL);
855 
856 	pthread_mutex_destroy(&rtransport.lock);
857 }
858 #undef TEST_GROUPS_COUNT
859 
860 static void
861 test_spdk_nvmf_rdma_request_parse_sgl_with_md(void)
862 {
863 	struct spdk_nvmf_rdma_transport rtransport;
864 	struct spdk_nvmf_rdma_device device;
865 	struct spdk_nvmf_rdma_request rdma_req = {};
866 	struct spdk_nvmf_rdma_recv recv;
867 	struct spdk_nvmf_rdma_poll_group group;
868 	struct spdk_nvmf_rdma_qpair rqpair;
869 	struct spdk_nvmf_rdma_poller poller;
870 	union nvmf_c2h_msg cpl;
871 	union nvmf_h2c_msg cmd;
872 	struct spdk_nvme_sgl_descriptor *sgl;
873 	struct spdk_nvme_sgl_descriptor sgl_desc[SPDK_NVMF_MAX_SGL_ENTRIES] = {{0}};
874 	struct spdk_nvmf_rdma_request_data data;
875 	struct spdk_nvmf_transport_pg_cache_buf	buffer;
876 	struct spdk_nvmf_transport_pg_cache_buf	*buffer_ptr;
877 	const uint32_t data_bs = 512;
878 	const uint32_t md_size = 8;
879 	int rc, i;
880 	void *aligned_buffer;
881 
882 	data.wr.sg_list = data.sgl;
883 	STAILQ_INIT(&group.group.buf_cache);
884 	group.group.buf_cache_size = 0;
885 	group.group.buf_cache_count = 0;
886 	group.group.transport = &rtransport.transport;
887 	STAILQ_INIT(&group.retired_bufs);
888 	poller.group = &group;
889 	rqpair.poller = &poller;
890 	rqpair.max_send_sge = SPDK_NVMF_MAX_SGL_ENTRIES;
891 
892 	sgl = &cmd.nvme_cmd.dptr.sgl1;
893 	rdma_req.recv = &recv;
894 	rdma_req.req.cmd = &cmd;
895 	rdma_req.req.rsp = &cpl;
896 	rdma_req.data.wr.sg_list = rdma_req.data.sgl;
897 	rdma_req.req.qpair = &rqpair.qpair;
898 	rdma_req.req.xfer = SPDK_NVME_DATA_CONTROLLER_TO_HOST;
899 
900 	rtransport.transport.opts = g_rdma_ut_transport_opts;
901 	rtransport.data_wr_pool = NULL;
902 	rtransport.transport.data_buf_pool = NULL;
903 
904 	device.attr.device_cap_flags = 0;
905 	device.map = NULL;
906 	g_rdma_mr.lkey = 0xABCD;
907 	sgl->keyed.key = 0xEEEE;
908 	sgl->address = 0xFFFF;
909 	rdma_req.recv->buf = (void *)0xDDDD;
910 
911 	/* Test 1: sgl type: keyed data block subtype: address */
912 	sgl->generic.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
913 	sgl->keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
914 
915 	/* Part 1: simple I/O, one SGL smaller than the transport io unit size, block size 512 */
916 	MOCK_SET(spdk_mempool_get, (void *)0x2000);
917 	reset_nvmf_rdma_request(&rdma_req);
918 	spdk_dif_ctx_init(&rdma_req.req.dif.dif_ctx, data_bs + md_size, md_size, true, false,
919 			  SPDK_DIF_TYPE1, SPDK_DIF_FLAGS_GUARD_CHECK | SPDK_DIF_FLAGS_REFTAG_CHECK,
920 			  0, 0, 0, 0, 0);
921 	rdma_req.req.dif.dif_insert_or_strip = true;
922 	rtransport.transport.opts.io_unit_size = data_bs * 8;
923 	sgl->keyed.length = data_bs * 4;
924 
925 	rc = spdk_nvmf_rdma_request_parse_sgl(&rtransport, &device, &rdma_req);
926 
927 	CU_ASSERT(rc == 0);
928 	CU_ASSERT(rdma_req.req.data_from_pool == true);
929 	CU_ASSERT(rdma_req.req.length == data_bs * 4);
930 	CU_ASSERT(rdma_req.req.dif.orig_length == rdma_req.req.length);
931 	CU_ASSERT(rdma_req.req.dif.elba_length == (data_bs + md_size) * 4);
932 	CU_ASSERT((uint64_t)rdma_req.req.data == 0x2000);
933 	CU_ASSERT(rdma_req.data.wr.num_sge == 4);
934 	CU_ASSERT(rdma_req.data.wr.wr.rdma.rkey == 0xEEEE);
935 	CU_ASSERT(rdma_req.data.wr.wr.rdma.remote_addr == 0xFFFF);
936 	CU_ASSERT((uint64_t)rdma_req.req.buffers[0] == 0x2000);
937 
938 	for (i = 0; i < 4; ++i) {
939 		CU_ASSERT(rdma_req.data.wr.sg_list[i].addr == 0x2000 + i * (data_bs + md_size));
940 		CU_ASSERT(rdma_req.data.wr.sg_list[i].length == data_bs);
941 		CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == g_rdma_mr.lkey);
942 	}
943 
944 	/* Part 2: simple I/O, one SGL equal to io unit size, io_unit_size is not aligned with md_size,
945 		block size 512 */
946 	MOCK_SET(spdk_mempool_get, (void *)0x2000);
947 	reset_nvmf_rdma_request(&rdma_req);
948 	spdk_dif_ctx_init(&rdma_req.req.dif.dif_ctx, data_bs + md_size, md_size, true, false,
949 			  SPDK_DIF_TYPE1, SPDK_DIF_FLAGS_GUARD_CHECK | SPDK_DIF_FLAGS_REFTAG_CHECK,
950 			  0, 0, 0, 0, 0);
951 	rdma_req.req.dif.dif_insert_or_strip = true;
952 	rtransport.transport.opts.io_unit_size = data_bs * 4;
953 	sgl->keyed.length = data_bs * 4;
954 
955 	rc = spdk_nvmf_rdma_request_parse_sgl(&rtransport, &device, &rdma_req);
956 
957 	CU_ASSERT(rc == 0);
958 	CU_ASSERT(rdma_req.req.data_from_pool == true);
959 	CU_ASSERT(rdma_req.req.length == data_bs * 4);
960 	CU_ASSERT(rdma_req.req.dif.orig_length == rdma_req.req.length);
961 	CU_ASSERT(rdma_req.req.dif.elba_length == (data_bs + md_size) * 4);
962 	CU_ASSERT((uint64_t)rdma_req.req.data == 0x2000);
963 	CU_ASSERT(rdma_req.data.wr.num_sge == 5);
964 	CU_ASSERT(rdma_req.data.wr.wr.rdma.rkey == 0xEEEE);
965 	CU_ASSERT(rdma_req.data.wr.wr.rdma.remote_addr == 0xFFFF);
966 	CU_ASSERT((uint64_t)rdma_req.req.buffers[0] == 0x2000);
967 
968 	for (i = 0; i < 3; ++i) {
969 		CU_ASSERT(rdma_req.data.wr.sg_list[i].addr == 0x2000 + i * (data_bs + md_size));
970 		CU_ASSERT(rdma_req.data.wr.sg_list[i].length == data_bs);
971 		CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == g_rdma_mr.lkey);
972 	}
973 	CU_ASSERT(rdma_req.data.wr.sg_list[3].addr == 0x2000 + 3 * (data_bs + md_size));
974 	CU_ASSERT(rdma_req.data.wr.sg_list[3].length == 488);
975 	CU_ASSERT(rdma_req.data.wr.sg_list[3].lkey == g_rdma_mr.lkey);
976 
977 	/* 2nd buffer consumed */
978 	CU_ASSERT(rdma_req.data.wr.sg_list[4].addr == 0x2000);
979 	CU_ASSERT(rdma_req.data.wr.sg_list[4].length == 24);
980 	CU_ASSERT(rdma_req.data.wr.sg_list[4].lkey == g_rdma_mr.lkey);
981 
982 	/* Part 3: simple I/O, one SGL equal io unit size, io_unit_size is equal to block size 512 bytes */
983 	MOCK_SET(spdk_mempool_get, (void *)0x2000);
984 	reset_nvmf_rdma_request(&rdma_req);
985 	spdk_dif_ctx_init(&rdma_req.req.dif.dif_ctx, data_bs + md_size, md_size, true, false,
986 			  SPDK_DIF_TYPE1, SPDK_DIF_FLAGS_GUARD_CHECK | SPDK_DIF_FLAGS_REFTAG_CHECK,
987 			  0, 0, 0, 0, 0);
988 	rdma_req.req.dif.dif_insert_or_strip = true;
989 	rtransport.transport.opts.io_unit_size = data_bs;
990 	sgl->keyed.length = data_bs;
991 
992 	rc = spdk_nvmf_rdma_request_parse_sgl(&rtransport, &device, &rdma_req);
993 
994 	CU_ASSERT(rc == 0);
995 	CU_ASSERT(rdma_req.req.data_from_pool == true);
996 	CU_ASSERT(rdma_req.req.length == data_bs);
997 	CU_ASSERT(rdma_req.req.dif.orig_length == rdma_req.req.length);
998 	CU_ASSERT(rdma_req.req.dif.elba_length == data_bs + md_size);
999 	CU_ASSERT((uint64_t)rdma_req.req.data == 0x2000);
1000 	CU_ASSERT(rdma_req.data.wr.num_sge == 1);
1001 	CU_ASSERT(rdma_req.data.wr.wr.rdma.rkey == 0xEEEE);
1002 	CU_ASSERT(rdma_req.data.wr.wr.rdma.remote_addr == 0xFFFF);
1003 	CU_ASSERT((uint64_t)rdma_req.req.buffers[0] == 0x2000);
1004 
1005 	CU_ASSERT(rdma_req.data.wr.sg_list[0].addr == 0x2000);
1006 	CU_ASSERT(rdma_req.data.wr.sg_list[0].length == data_bs);
1007 	CU_ASSERT(rdma_req.data.wr.sg_list[0].lkey == g_rdma_mr.lkey);
1008 
1009 	CU_ASSERT(rdma_req.req.iovcnt == 2);
1010 	CU_ASSERT(rdma_req.req.iov[0].iov_base == (void *)((unsigned long)0x2000));
1011 	CU_ASSERT(rdma_req.req.iov[0].iov_len == data_bs);
1012 	/* 2nd buffer consumed for metadata */
1013 	CU_ASSERT(rdma_req.req.iov[1].iov_base == (void *)((unsigned long)0x2000));
1014 	CU_ASSERT(rdma_req.req.iov[1].iov_len == md_size);
1015 
1016 	/* Part 4: simple I/O, one SGL equal io unit size, io_unit_size is aligned with md_size,
1017 	   block size 512 */
1018 	MOCK_SET(spdk_mempool_get, (void *)0x2000);
1019 	reset_nvmf_rdma_request(&rdma_req);
1020 	spdk_dif_ctx_init(&rdma_req.req.dif.dif_ctx, data_bs + md_size, md_size, true, false,
1021 			  SPDK_DIF_TYPE1, SPDK_DIF_FLAGS_GUARD_CHECK | SPDK_DIF_FLAGS_REFTAG_CHECK,
1022 			  0, 0, 0, 0, 0);
1023 	rdma_req.req.dif.dif_insert_or_strip = true;
1024 	rtransport.transport.opts.io_unit_size = (data_bs + md_size) * 4;
1025 	sgl->keyed.length = data_bs * 4;
1026 
1027 	rc = spdk_nvmf_rdma_request_parse_sgl(&rtransport, &device, &rdma_req);
1028 
1029 	CU_ASSERT(rc == 0);
1030 	CU_ASSERT(rdma_req.req.data_from_pool == true);
1031 	CU_ASSERT(rdma_req.req.length == data_bs * 4);
1032 	CU_ASSERT(rdma_req.req.dif.orig_length == rdma_req.req.length);
1033 	CU_ASSERT(rdma_req.req.dif.elba_length == (data_bs + md_size) * 4);
1034 	CU_ASSERT((uint64_t)rdma_req.req.data == 0x2000);
1035 	CU_ASSERT(rdma_req.data.wr.num_sge == 4);
1036 	CU_ASSERT(rdma_req.data.wr.wr.rdma.rkey == 0xEEEE);
1037 	CU_ASSERT(rdma_req.data.wr.wr.rdma.remote_addr == 0xFFFF);
1038 	CU_ASSERT((uint64_t)rdma_req.req.buffers[0] == 0x2000);
1039 
1040 	for (i = 0; i < 4; ++i) {
1041 		CU_ASSERT(rdma_req.data.wr.sg_list[i].addr == 0x2000 + i * (data_bs + md_size));
1042 		CU_ASSERT(rdma_req.data.wr.sg_list[i].length == data_bs);
1043 		CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == g_rdma_mr.lkey);
1044 	}
1045 
1046 	/* Part 5: simple I/O, one SGL equal to 2x io unit size, io_unit_size is aligned with md_size,
1047 	   block size 512 */
1048 	MOCK_SET(spdk_mempool_get, (void *)0x2000);
1049 	reset_nvmf_rdma_request(&rdma_req);
1050 	spdk_dif_ctx_init(&rdma_req.req.dif.dif_ctx, data_bs + md_size, md_size, true, false,
1051 			  SPDK_DIF_TYPE1, SPDK_DIF_FLAGS_GUARD_CHECK | SPDK_DIF_FLAGS_REFTAG_CHECK,
1052 			  0, 0, 0, 0, 0);
1053 	rdma_req.req.dif.dif_insert_or_strip = true;
1054 	rtransport.transport.opts.io_unit_size = (data_bs + md_size) * 2;
1055 	sgl->keyed.length = data_bs * 4;
1056 
1057 	rc = spdk_nvmf_rdma_request_parse_sgl(&rtransport, &device, &rdma_req);
1058 
1059 	CU_ASSERT(rc == 0);
1060 	CU_ASSERT(rdma_req.req.data_from_pool == true);
1061 	CU_ASSERT(rdma_req.req.length == data_bs * 4);
1062 	CU_ASSERT(rdma_req.req.dif.orig_length == rdma_req.req.length);
1063 	CU_ASSERT(rdma_req.req.dif.elba_length == (data_bs + md_size) * 4);
1064 	CU_ASSERT((uint64_t)rdma_req.req.data == 0x2000);
1065 	CU_ASSERT(rdma_req.data.wr.num_sge == 4);
1066 	CU_ASSERT(rdma_req.data.wr.wr.rdma.rkey == 0xEEEE);
1067 	CU_ASSERT(rdma_req.data.wr.wr.rdma.remote_addr == 0xFFFF);
1068 	CU_ASSERT((uint64_t)rdma_req.req.buffers[0] == 0x2000);
1069 
1070 	for (i = 0; i < 2; ++i) {
1071 		CU_ASSERT(rdma_req.data.wr.sg_list[i].addr == 0x2000 + i * (data_bs + md_size));
1072 		CU_ASSERT(rdma_req.data.wr.sg_list[i].length == data_bs);
1073 	}
1074 	for (i = 0; i < 2; ++i) {
1075 		CU_ASSERT(rdma_req.data.wr.sg_list[i + 2].addr == 0x2000 + i * (data_bs + md_size));
1076 		CU_ASSERT(rdma_req.data.wr.sg_list[i + 2].length == data_bs);
1077 	}
1078 
1079 	/* Part 6: simple I/O, one SGL larger than the transport io unit size, io_unit_size is not aligned to md_size,
1080 	   block size 512 */
1081 	MOCK_SET(spdk_mempool_get, (void *)0x2000);
1082 	reset_nvmf_rdma_request(&rdma_req);
1083 	spdk_dif_ctx_init(&rdma_req.req.dif.dif_ctx, data_bs + md_size, md_size, true, false,
1084 			  SPDK_DIF_TYPE1, SPDK_DIF_FLAGS_GUARD_CHECK | SPDK_DIF_FLAGS_REFTAG_CHECK,
1085 			  0, 0, 0, 0, 0);
1086 	rdma_req.req.dif.dif_insert_or_strip = true;
1087 	rtransport.transport.opts.io_unit_size = data_bs * 4;
1088 	sgl->keyed.length = data_bs * 6;
1089 
1090 	rc = spdk_nvmf_rdma_request_parse_sgl(&rtransport, &device, &rdma_req);
1091 
1092 	CU_ASSERT(rc == 0);
1093 	CU_ASSERT(rdma_req.req.data_from_pool == true);
1094 	CU_ASSERT(rdma_req.req.length == data_bs * 6);
1095 	CU_ASSERT(rdma_req.req.dif.orig_length == rdma_req.req.length);
1096 	CU_ASSERT(rdma_req.req.dif.elba_length == (data_bs + md_size) * 6);
1097 	CU_ASSERT((uint64_t)rdma_req.req.data == 0x2000);
1098 	CU_ASSERT(rdma_req.data.wr.num_sge == 7);
1099 	CU_ASSERT(rdma_req.data.wr.wr.rdma.rkey == 0xEEEE);
1100 	CU_ASSERT(rdma_req.data.wr.wr.rdma.remote_addr == 0xFFFF);
1101 	CU_ASSERT((uint64_t)rdma_req.req.buffers[0] == 0x2000);
1102 
1103 	for (i = 0; i < 3; ++i) {
1104 		CU_ASSERT(rdma_req.data.wr.sg_list[i].addr == 0x2000 + i * (data_bs + md_size));
1105 		CU_ASSERT(rdma_req.data.wr.sg_list[i].length == data_bs);
1106 		CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == g_rdma_mr.lkey);
1107 	}
1108 	CU_ASSERT(rdma_req.data.wr.sg_list[3].addr == 0x2000 + 3 * (data_bs + md_size));
1109 	CU_ASSERT(rdma_req.data.wr.sg_list[3].length == 488);
1110 	CU_ASSERT(rdma_req.data.wr.sg_list[3].lkey == g_rdma_mr.lkey);
1111 
1112 	/* 2nd IO buffer consumed */
1113 	CU_ASSERT(rdma_req.data.wr.sg_list[4].addr == 0x2000);
1114 	CU_ASSERT(rdma_req.data.wr.sg_list[4].length == 24);
1115 	CU_ASSERT(rdma_req.data.wr.sg_list[4].lkey == g_rdma_mr.lkey);
1116 
1117 	CU_ASSERT(rdma_req.data.wr.sg_list[5].addr == 0x2000 + 24 + md_size);
1118 	CU_ASSERT(rdma_req.data.wr.sg_list[5].length == 512);
1119 	CU_ASSERT(rdma_req.data.wr.sg_list[5].lkey == g_rdma_mr.lkey);
1120 
1121 	CU_ASSERT(rdma_req.data.wr.sg_list[6].addr == 0x2000 + 24 + 512 + md_size * 2);
1122 	CU_ASSERT(rdma_req.data.wr.sg_list[6].length == 512);
1123 	CU_ASSERT(rdma_req.data.wr.sg_list[6].lkey == g_rdma_mr.lkey);
1124 
1125 	/* Part 7: simple I/O, number of SGL entries exceeds the number of entries
1126 	   one WR can hold. Additional WR is chained */
1127 	MOCK_SET(spdk_mempool_get, &data);
1128 	aligned_buffer = (void *)((uintptr_t)((char *)&data + NVMF_DATA_BUFFER_MASK) &
1129 				  ~NVMF_DATA_BUFFER_MASK);
1130 	reset_nvmf_rdma_request(&rdma_req);
1131 	spdk_dif_ctx_init(&rdma_req.req.dif.dif_ctx, data_bs + md_size, md_size, true, false,
1132 			  SPDK_DIF_TYPE1, SPDK_DIF_FLAGS_GUARD_CHECK | SPDK_DIF_FLAGS_REFTAG_CHECK,
1133 			  0, 0, 0, 0, 0);
1134 	rdma_req.req.dif.dif_insert_or_strip = true;
1135 	rtransport.transport.opts.io_unit_size = data_bs * 16;
1136 	sgl->keyed.length = data_bs * 16;
1137 
1138 	rc = spdk_nvmf_rdma_request_parse_sgl(&rtransport, &device, &rdma_req);
1139 
1140 	CU_ASSERT(rc == 0);
1141 	CU_ASSERT(rdma_req.req.data_from_pool == true);
1142 	CU_ASSERT(rdma_req.req.length == data_bs * 16);
1143 	CU_ASSERT(rdma_req.req.iovcnt == 2);
1144 	CU_ASSERT(rdma_req.req.dif.orig_length == rdma_req.req.length);
1145 	CU_ASSERT(rdma_req.req.dif.elba_length == (data_bs + md_size) * 16);
1146 	CU_ASSERT(rdma_req.req.data == aligned_buffer);
1147 	CU_ASSERT(rdma_req.data.wr.num_sge == 16);
1148 	CU_ASSERT(rdma_req.data.wr.wr.rdma.rkey == 0xEEEE);
1149 	CU_ASSERT(rdma_req.data.wr.wr.rdma.remote_addr == 0xFFFF);
1150 	/* additional wr from pool */
1151 	CU_ASSERT(rdma_req.data.wr.next == (void *)&data.wr);
1152 	CU_ASSERT(rdma_req.data.wr.next->num_sge == 1);
1153 	CU_ASSERT(rdma_req.data.wr.next->next == &rdma_req.rsp.wr);
1154 
1155 	/* Part 8: simple I/O, data with metadata do not fit to 1 io_buffer */
1156 	MOCK_SET(spdk_mempool_get, (void *)0x2000);
1157 	reset_nvmf_rdma_request(&rdma_req);
1158 	spdk_dif_ctx_init(&rdma_req.req.dif.dif_ctx, data_bs + md_size, md_size, true, false,
1159 			  SPDK_DIF_TYPE1, SPDK_DIF_FLAGS_GUARD_CHECK | SPDK_DIF_FLAGS_REFTAG_CHECK,
1160 			  0, 0, 0, 0, 0);
1161 	rdma_req.req.dif.dif_insert_or_strip = true;
1162 	rtransport.transport.opts.io_unit_size = 516;
1163 	sgl->keyed.length = data_bs * 2;
1164 
1165 	rc = spdk_nvmf_rdma_request_parse_sgl(&rtransport, &device, &rdma_req);
1166 
1167 	CU_ASSERT(rc == 0);
1168 	CU_ASSERT(rdma_req.req.data_from_pool == true);
1169 	CU_ASSERT(rdma_req.req.length == data_bs * 2);
1170 	CU_ASSERT(rdma_req.req.iovcnt == 3);
1171 	CU_ASSERT(rdma_req.req.dif.orig_length == rdma_req.req.length);
1172 	CU_ASSERT(rdma_req.req.dif.elba_length == (data_bs + md_size) * 2);
1173 	CU_ASSERT(rdma_req.req.data == (void *)0x2000);
1174 	CU_ASSERT(rdma_req.data.wr.num_sge == 2);
1175 	CU_ASSERT(rdma_req.data.wr.wr.rdma.rkey == 0xEEEE);
1176 	CU_ASSERT(rdma_req.data.wr.wr.rdma.remote_addr == 0xFFFF);
1177 
1178 	CU_ASSERT(rdma_req.data.wr.sg_list[0].addr == 0x2000);
1179 	CU_ASSERT(rdma_req.data.wr.sg_list[0].length == 512);
1180 	CU_ASSERT(rdma_req.data.wr.sg_list[0].lkey == g_rdma_mr.lkey);
1181 
1182 	/* 2nd IO buffer consumed, offset 4 bytes due to part of the metadata
1183 	  is located at the beginning of that buffer */
1184 	CU_ASSERT(rdma_req.data.wr.sg_list[1].addr == 0x2000 + 4);
1185 	CU_ASSERT(rdma_req.data.wr.sg_list[1].length == 512);
1186 	CU_ASSERT(rdma_req.data.wr.sg_list[1].lkey == g_rdma_mr.lkey);
1187 
1188 	/* Test 9 dealing with a buffer split over two Memory Regions */
1189 	MOCK_SET(spdk_mempool_get, (void *)&buffer);
1190 	reset_nvmf_rdma_request(&rdma_req);
1191 	spdk_dif_ctx_init(&rdma_req.req.dif.dif_ctx, data_bs + md_size, md_size, true, false,
1192 			  SPDK_DIF_TYPE1, SPDK_DIF_FLAGS_GUARD_CHECK | SPDK_DIF_FLAGS_REFTAG_CHECK,
1193 			  0, 0, 0, 0, 0);
1194 	rdma_req.req.dif.dif_insert_or_strip = true;
1195 	rtransport.transport.opts.io_unit_size = data_bs * 4;
1196 	sgl->keyed.length = data_bs * 2;
1197 	g_mr_size = data_bs;
1198 	g_mr_next_size = rtransport.transport.opts.io_unit_size;
1199 
1200 	rc = spdk_nvmf_rdma_request_parse_sgl(&rtransport, &device, &rdma_req);
1201 	SPDK_CU_ASSERT_FATAL(rc == 0);
1202 	CU_ASSERT(rdma_req.req.data_from_pool == true);
1203 	CU_ASSERT(rdma_req.req.length == rtransport.transport.opts.io_unit_size / 2);
1204 	CU_ASSERT((uint64_t)rdma_req.req.data == (((uint64_t)&buffer + NVMF_DATA_BUFFER_MASK) &
1205 			~NVMF_DATA_BUFFER_MASK));
1206 	CU_ASSERT(rdma_req.data.wr.num_sge == 2);
1207 	CU_ASSERT(rdma_req.data.wr.wr.rdma.rkey == 0xEEEE);
1208 	CU_ASSERT(rdma_req.data.wr.wr.rdma.remote_addr == 0xFFFF);
1209 	CU_ASSERT(rdma_req.req.buffers[0] == &buffer);
1210 	for (i = 0; i < 2; i++) {
1211 		CU_ASSERT(rdma_req.data.wr.sg_list[i].addr == (uint64_t)rdma_req.req.data + i *
1212 			  (data_bs + md_size));
1213 		CU_ASSERT(rdma_req.data.wr.sg_list[i].length == data_bs);
1214 		CU_ASSERT(rdma_req.data.wr.sg_list[i].lkey == g_rdma_mr.lkey);
1215 	}
1216 	buffer_ptr = STAILQ_FIRST(&group.retired_bufs);
1217 	CU_ASSERT(buffer_ptr == &buffer);
1218 	STAILQ_REMOVE(&group.retired_bufs, buffer_ptr, spdk_nvmf_transport_pg_cache_buf, link);
1219 	CU_ASSERT(STAILQ_EMPTY(&group.retired_bufs));
1220 	g_mr_size = 0;
1221 	g_mr_next_size = 0;
1222 
1223 	/* Test 2: Multi SGL */
1224 	sgl->generic.type = SPDK_NVME_SGL_TYPE_LAST_SEGMENT;
1225 	sgl->unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET;
1226 	sgl->address = 0;
1227 	rdma_req.recv->buf = (void *)&sgl_desc;
1228 	MOCK_SET(spdk_mempool_get, &data);
1229 	aligned_buffer = (void *)((uintptr_t)((char *)&data + NVMF_DATA_BUFFER_MASK) &
1230 				  ~NVMF_DATA_BUFFER_MASK);
1231 
1232 	/* part 1: 2 segments each with 1 wr. io_unit_size is aligned with data_bs + md_size */
1233 	reset_nvmf_rdma_request(&rdma_req);
1234 	spdk_dif_ctx_init(&rdma_req.req.dif.dif_ctx, data_bs + md_size, md_size, true, false,
1235 			  SPDK_DIF_TYPE1,
1236 			  SPDK_DIF_FLAGS_GUARD_CHECK | SPDK_DIF_FLAGS_REFTAG_CHECK, 0, 0, 0, 0, 0);
1237 	rdma_req.req.dif.dif_insert_or_strip = true;
1238 	rtransport.transport.opts.io_unit_size = (data_bs + md_size) * 4;
1239 	sgl->unkeyed.length = 2 * sizeof(struct spdk_nvme_sgl_descriptor);
1240 
1241 	for (i = 0; i < 2; i++) {
1242 		sgl_desc[i].keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
1243 		sgl_desc[i].keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
1244 		sgl_desc[i].keyed.length = data_bs * 4;
1245 		sgl_desc[i].address = 0x4000 + i * data_bs * 4;
1246 		sgl_desc[i].keyed.key = 0x44;
1247 	}
1248 
1249 	rc = spdk_nvmf_rdma_request_parse_sgl(&rtransport, &device, &rdma_req);
1250 
1251 	CU_ASSERT(rc == 0);
1252 	CU_ASSERT(rdma_req.req.data_from_pool == true);
1253 	CU_ASSERT(rdma_req.req.length == data_bs * 4 * 2);
1254 	CU_ASSERT(rdma_req.req.dif.orig_length == rdma_req.req.length);
1255 	CU_ASSERT(rdma_req.req.dif.elba_length == (data_bs + md_size) * 4 * 2);
1256 	CU_ASSERT(rdma_req.data.wr.num_sge == 4);
1257 	for (i = 0; i < 4; ++i) {
1258 		CU_ASSERT(rdma_req.data.wr.sg_list[i].addr == (uintptr_t)((unsigned char *)aligned_buffer) + i *
1259 			  (data_bs + md_size));
1260 		CU_ASSERT(rdma_req.data.wr.sg_list[i].length == data_bs);
1261 	}
1262 
1263 	CU_ASSERT(rdma_req.data.wr.wr.rdma.rkey == 0x44);
1264 	CU_ASSERT(rdma_req.data.wr.wr.rdma.remote_addr == 0x4000);
1265 	CU_ASSERT(rdma_req.data.wr.next == &data.wr);
1266 	CU_ASSERT(data.wr.wr.rdma.rkey == 0x44);
1267 	CU_ASSERT(data.wr.wr.rdma.remote_addr == 0x4000 + data_bs * 4);
1268 	CU_ASSERT(data.wr.num_sge == 4);
1269 	for (i = 0; i < 4; ++i) {
1270 		CU_ASSERT(data.wr.sg_list[i].addr == (uintptr_t)((unsigned char *)aligned_buffer) + i *
1271 			  (data_bs + md_size));
1272 		CU_ASSERT(data.wr.sg_list[i].length == data_bs);
1273 	}
1274 
1275 	CU_ASSERT(data.wr.next == &rdma_req.rsp.wr);
1276 }
1277 
1278 int main(int argc, char **argv)
1279 {
1280 	CU_pSuite	suite = NULL;
1281 	unsigned int	num_failures;
1282 
1283 	CU_set_error_action(CUEA_ABORT);
1284 	CU_initialize_registry();
1285 
1286 	suite = CU_add_suite("nvmf", NULL, NULL);
1287 
1288 	CU_ADD_TEST(suite, test_spdk_nvmf_rdma_request_parse_sgl);
1289 	CU_ADD_TEST(suite, test_spdk_nvmf_rdma_request_process);
1290 	CU_ADD_TEST(suite, test_spdk_nvmf_rdma_get_optimal_poll_group);
1291 	CU_ADD_TEST(suite, test_spdk_nvmf_rdma_request_parse_sgl_with_md);
1292 
1293 	CU_basic_set_mode(CU_BRM_VERBOSE);
1294 	CU_basic_run_tests();
1295 	num_failures = CU_get_number_of_failures();
1296 	CU_cleanup_registry();
1297 	return num_failures;
1298 }
1299