1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * hermon_qp.c
28 * Hermon Queue Pair Processing Routines
29 *
30 * Implements all the routines necessary for allocating, freeing, and
31 * querying the Hermon queue pairs.
32 */
33
34 #include <sys/types.h>
35 #include <sys/conf.h>
36 #include <sys/ddi.h>
37 #include <sys/sunddi.h>
38 #include <sys/modctl.h>
39 #include <sys/bitmap.h>
40 #include <sys/sysmacros.h>
41
42 #include <sys/ib/adapters/hermon/hermon.h>
43 #include <sys/ib/ib_pkt_hdrs.h>
44
45 static int hermon_qp_create_qpn(hermon_state_t *state, hermon_qphdl_t qp,
46 hermon_rsrc_t *qpc);
47 static int hermon_qpn_avl_compare(const void *q, const void *e);
48 static int hermon_special_qp_rsrc_alloc(hermon_state_t *state,
49 ibt_sqp_type_t type, uint_t port, hermon_rsrc_t **qp_rsrc);
50 static int hermon_special_qp_rsrc_free(hermon_state_t *state,
51 ibt_sqp_type_t type, uint_t port);
52 static void hermon_qp_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl,
53 uint_t real_max_sgl, hermon_qp_wq_type_t wq_type,
54 uint_t *logwqesz, uint_t *max_sgl);
55
56 /*
57 * hermon_qp_alloc()
58 * Context: Can be called only from user or kernel context.
59 */
60 int
hermon_qp_alloc(hermon_state_t * state,hermon_qp_info_t * qpinfo,uint_t sleepflag)61 hermon_qp_alloc(hermon_state_t *state, hermon_qp_info_t *qpinfo,
62 uint_t sleepflag)
63 {
64 hermon_rsrc_t *qpc, *rsrc;
65 hermon_rsrc_type_t rsrc_type;
66 hermon_umap_db_entry_t *umapdb;
67 hermon_qphdl_t qp;
68 ibt_qp_alloc_attr_t *attr_p;
69 ibt_qp_alloc_flags_t alloc_flags;
70 ibt_qp_type_t type;
71 hermon_qp_wq_type_t swq_type;
72 ibtl_qp_hdl_t ibt_qphdl;
73 ibt_chan_sizes_t *queuesz_p;
74 ib_qpn_t *qpn;
75 hermon_qphdl_t *qphdl;
76 ibt_mr_attr_t mr_attr;
77 hermon_mr_options_t mr_op;
78 hermon_srqhdl_t srq;
79 hermon_pdhdl_t pd;
80 hermon_cqhdl_t sq_cq, rq_cq;
81 hermon_mrhdl_t mr;
82 uint64_t value, qp_desc_off;
83 uint64_t *thewqe, thewqesz;
84 uint32_t *sq_buf, *rq_buf;
85 uint32_t log_qp_sq_size, log_qp_rq_size;
86 uint32_t sq_size, rq_size;
87 uint32_t sq_depth, rq_depth;
88 uint32_t sq_wqe_size, rq_wqe_size, wqesz_shift;
89 uint32_t max_sgl, max_recv_sgl, uarpg;
90 uint_t qp_is_umap;
91 uint_t qp_srq_en, i, j;
92 int status, flag;
93
94 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p, *queuesz_p))
95
96 /*
97 * Extract the necessary info from the hermon_qp_info_t structure
98 */
99 attr_p = qpinfo->qpi_attrp;
100 type = qpinfo->qpi_type;
101 ibt_qphdl = qpinfo->qpi_ibt_qphdl;
102 queuesz_p = qpinfo->qpi_queueszp;
103 qpn = qpinfo->qpi_qpn;
104 qphdl = &qpinfo->qpi_qphdl;
105 alloc_flags = attr_p->qp_alloc_flags;
106
107 /*
108 * Verify correctness of alloc_flags.
109 *
110 * 1. FEXCH and RSS are only allocated via qp_range.
111 */
112 if (alloc_flags & (IBT_QP_USES_FEXCH | IBT_QP_USES_RSS)) {
113 return (IBT_INVALID_PARAM);
114 }
115 rsrc_type = HERMON_QPC;
116 qp_is_umap = 0;
117
118 /* 2. Make sure only one of these flags is set. */
119 switch (alloc_flags &
120 (IBT_QP_USER_MAP | IBT_QP_USES_RFCI | IBT_QP_USES_FCMD)) {
121 case IBT_QP_USER_MAP:
122 qp_is_umap = 1;
123 break;
124 case IBT_QP_USES_RFCI:
125 if (type != IBT_UD_RQP)
126 return (IBT_INVALID_PARAM);
127
128 switch (attr_p->qp_fc.fc_hca_port) {
129 case 1:
130 rsrc_type = HERMON_QPC_RFCI_PORT1;
131 break;
132 case 2:
133 rsrc_type = HERMON_QPC_RFCI_PORT2;
134 break;
135 default:
136 return (IBT_INVALID_PARAM);
137 }
138 break;
139 case IBT_QP_USES_FCMD:
140 if (type != IBT_UD_RQP)
141 return (IBT_INVALID_PARAM);
142 break;
143 case 0:
144 break;
145 default:
146 return (IBT_INVALID_PARAM); /* conflicting flags set */
147 }
148
149 /*
150 * Determine whether QP is being allocated for userland access or
151 * whether it is being allocated for kernel access. If the QP is
152 * being allocated for userland access, then lookup the UAR
153 * page number for the current process. Note: If this is not found
154 * (e.g. if the process has not previously open()'d the Hermon driver),
155 * then an error is returned.
156 */
157 if (qp_is_umap) {
158 status = hermon_umap_db_find(state->hs_instance, ddi_get_pid(),
159 MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
160 if (status != DDI_SUCCESS) {
161 return (IBT_INVALID_PARAM);
162 }
163 uarpg = ((hermon_rsrc_t *)(uintptr_t)value)->hr_indx;
164 } else {
165 uarpg = state->hs_kernel_uar_index;
166 }
167
168 /*
169 * Determine whether QP is being associated with an SRQ
170 */
171 qp_srq_en = (alloc_flags & IBT_QP_USES_SRQ) ? 1 : 0;
172 if (qp_srq_en) {
173 /*
174 * Check for valid SRQ handle pointers
175 */
176 if (attr_p->qp_ibc_srq_hdl == NULL) {
177 status = IBT_SRQ_HDL_INVALID;
178 goto qpalloc_fail;
179 }
180 srq = (hermon_srqhdl_t)attr_p->qp_ibc_srq_hdl;
181 }
182
183 /*
184 * Check for valid QP service type (only UD/RC/UC supported)
185 */
186 if (((type != IBT_UD_RQP) && (type != IBT_RC_RQP) &&
187 (type != IBT_UC_RQP))) {
188 status = IBT_QP_SRV_TYPE_INVALID;
189 goto qpalloc_fail;
190 }
191
192
193 /*
194 * Check for valid PD handle pointer
195 */
196 if (attr_p->qp_pd_hdl == NULL) {
197 status = IBT_PD_HDL_INVALID;
198 goto qpalloc_fail;
199 }
200 pd = (hermon_pdhdl_t)attr_p->qp_pd_hdl;
201
202 /*
203 * If on an SRQ, check to make sure the PD is the same
204 */
205 if (qp_srq_en && (pd->pd_pdnum != srq->srq_pdhdl->pd_pdnum)) {
206 status = IBT_PD_HDL_INVALID;
207 goto qpalloc_fail;
208 }
209
210 /* Increment the reference count on the protection domain (PD) */
211 hermon_pd_refcnt_inc(pd);
212
213 /*
214 * Check for valid CQ handle pointers
215 *
216 * FCMD QPs do not require a receive cq handle.
217 */
218 if (attr_p->qp_ibc_scq_hdl == NULL) {
219 status = IBT_CQ_HDL_INVALID;
220 goto qpalloc_fail1;
221 }
222 sq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_scq_hdl;
223 if ((attr_p->qp_ibc_rcq_hdl == NULL)) {
224 if ((alloc_flags & IBT_QP_USES_FCMD) == 0) {
225 status = IBT_CQ_HDL_INVALID;
226 goto qpalloc_fail1;
227 }
228 rq_cq = sq_cq; /* just use the send cq */
229 } else
230 rq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_rcq_hdl;
231
232 /*
233 * Increment the reference count on the CQs. One or both of these
234 * could return error if we determine that the given CQ is already
235 * being used with a special (SMI/GSI) QP.
236 */
237 status = hermon_cq_refcnt_inc(sq_cq, HERMON_CQ_IS_NORMAL);
238 if (status != DDI_SUCCESS) {
239 status = IBT_CQ_HDL_INVALID;
240 goto qpalloc_fail1;
241 }
242 status = hermon_cq_refcnt_inc(rq_cq, HERMON_CQ_IS_NORMAL);
243 if (status != DDI_SUCCESS) {
244 status = IBT_CQ_HDL_INVALID;
245 goto qpalloc_fail2;
246 }
247
248 /*
249 * Allocate an QP context entry. This will be filled in with all
250 * the necessary parameters to define the Queue Pair. Unlike
251 * other Hermon hardware resources, ownership is not immediately
252 * given to hardware in the final step here. Instead, we must
253 * wait until the QP is later transitioned to the "Init" state before
254 * passing the QP to hardware. If we fail here, we must undo all
255 * the reference count (CQ and PD).
256 */
257 status = hermon_rsrc_alloc(state, rsrc_type, 1, sleepflag, &qpc);
258 if (status != DDI_SUCCESS) {
259 status = IBT_INSUFF_RESOURCE;
260 goto qpalloc_fail3;
261 }
262
263 /*
264 * Allocate the software structure for tracking the queue pair
265 * (i.e. the Hermon Queue Pair handle). If we fail here, we must
266 * undo the reference counts and the previous resource allocation.
267 */
268 status = hermon_rsrc_alloc(state, HERMON_QPHDL, 1, sleepflag, &rsrc);
269 if (status != DDI_SUCCESS) {
270 status = IBT_INSUFF_RESOURCE;
271 goto qpalloc_fail4;
272 }
273 qp = (hermon_qphdl_t)rsrc->hr_addr;
274 bzero(qp, sizeof (struct hermon_sw_qp_s));
275 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp))
276
277 qp->qp_alloc_flags = alloc_flags;
278
279 /*
280 * Calculate the QP number from QPC index. This routine handles
281 * all of the operations necessary to keep track of used, unused,
282 * and released QP numbers.
283 */
284 if (type == IBT_UD_RQP) {
285 qp->qp_qpnum = qpc->hr_indx;
286 qp->qp_ring = qp->qp_qpnum << 8;
287 qp->qp_qpn_hdl = NULL;
288 } else {
289 status = hermon_qp_create_qpn(state, qp, qpc);
290 if (status != DDI_SUCCESS) {
291 status = IBT_INSUFF_RESOURCE;
292 goto qpalloc_fail5;
293 }
294 }
295
296 /*
297 * If this will be a user-mappable QP, then allocate an entry for
298 * the "userland resources database". This will later be added to
299 * the database (after all further QP operations are successful).
300 * If we fail here, we must undo the reference counts and the
301 * previous resource allocation.
302 */
303 if (qp_is_umap) {
304 umapdb = hermon_umap_db_alloc(state->hs_instance, qp->qp_qpnum,
305 MLNX_UMAP_QPMEM_RSRC, (uint64_t)(uintptr_t)rsrc);
306 if (umapdb == NULL) {
307 status = IBT_INSUFF_RESOURCE;
308 goto qpalloc_fail6;
309 }
310 }
311
312 /*
313 * Allocate the doorbell record. Hermon just needs one for the RQ,
314 * if the QP is not associated with an SRQ, and use uarpg (above) as
315 * the uar index
316 */
317
318 if (!qp_srq_en) {
319 status = hermon_dbr_alloc(state, uarpg, &qp->qp_rq_dbr_acchdl,
320 &qp->qp_rq_vdbr, &qp->qp_rq_pdbr, &qp->qp_rdbr_mapoffset);
321 if (status != DDI_SUCCESS) {
322 status = IBT_INSUFF_RESOURCE;
323 goto qpalloc_fail6;
324 }
325 }
326
327 qp->qp_uses_lso = (attr_p->qp_flags & IBT_USES_LSO);
328
329 /*
330 * We verify that the requested number of SGL is valid (i.e.
331 * consistent with the device limits and/or software-configured
332 * limits). If not, then obviously the same cleanup needs to be done.
333 */
334 if (type == IBT_UD_RQP) {
335 max_sgl = state->hs_ibtfinfo.hca_attr->hca_ud_send_sgl_sz;
336 swq_type = HERMON_QP_WQ_TYPE_SENDQ_UD;
337 } else {
338 max_sgl = state->hs_ibtfinfo.hca_attr->hca_conn_send_sgl_sz;
339 swq_type = HERMON_QP_WQ_TYPE_SENDQ_CONN;
340 }
341 max_recv_sgl = state->hs_ibtfinfo.hca_attr->hca_recv_sgl_sz;
342 if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) ||
343 (!qp_srq_en && (attr_p->qp_sizes.cs_rq_sgl > max_recv_sgl))) {
344 status = IBT_HCA_SGL_EXCEEDED;
345 goto qpalloc_fail7;
346 }
347
348 /*
349 * Determine this QP's WQE stride (for both the Send and Recv WQEs).
350 * This will depend on the requested number of SGLs. Note: this
351 * has the side-effect of also calculating the real number of SGLs
352 * (for the calculated WQE size).
353 *
354 * For QP's on an SRQ, we set these to 0.
355 */
356 if (qp_srq_en) {
357 qp->qp_rq_log_wqesz = 0;
358 qp->qp_rq_sgl = 0;
359 } else {
360 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl,
361 max_recv_sgl, HERMON_QP_WQ_TYPE_RECVQ,
362 &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl);
363 }
364 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
365 max_sgl, swq_type, &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl);
366
367 sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
368
369 /* NOTE: currently policy in driver, later maybe IBTF interface */
370 qp->qp_no_prefetch = 0;
371
372 /*
373 * for prefetching, we need to add the number of wqes in
374 * the 2k area plus one to the number requested, but
375 * ONLY for send queue. If no_prefetch == 1 (prefetch off)
376 * it's exactly TWO wqes for the headroom
377 */
378 if (qp->qp_no_prefetch)
379 qp->qp_sq_headroom = 2 * sq_wqe_size;
380 else
381 qp->qp_sq_headroom = sq_wqe_size + HERMON_QP_OH_SIZE;
382 /*
383 * hdrm wqes must be integral since both sq_wqe_size &
384 * HERMON_QP_OH_SIZE are power of 2
385 */
386 qp->qp_sq_hdrmwqes = (qp->qp_sq_headroom / sq_wqe_size);
387
388
389 /*
390 * Calculate the appropriate size for the work queues.
391 * For send queue, add in the headroom wqes to the calculation.
392 * Note: All Hermon QP work queues must be a power-of-2 in size. Also
393 * they may not be any smaller than HERMON_QP_MIN_SIZE. This step is
394 * to round the requested size up to the next highest power-of-2
395 */
396 /* first, adjust to a minimum and tell the caller the change */
397 attr_p->qp_sizes.cs_sq = max(attr_p->qp_sizes.cs_sq,
398 HERMON_QP_MIN_SIZE);
399 attr_p->qp_sizes.cs_rq = max(attr_p->qp_sizes.cs_rq,
400 HERMON_QP_MIN_SIZE);
401 /*
402 * now, calculate the alloc size, taking into account
403 * the headroom for the sq
404 */
405 log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes);
406 /* if the total is a power of two, reduce it */
407 if (((attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes) &
408 (attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes - 1)) == 0) {
409 log_qp_sq_size = log_qp_sq_size - 1;
410 }
411
412 log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq);
413 if ((attr_p->qp_sizes.cs_rq & (attr_p->qp_sizes.cs_rq - 1)) == 0) {
414 log_qp_rq_size = log_qp_rq_size - 1;
415 }
416
417 /*
418 * Next we verify that the rounded-up size is valid (i.e. consistent
419 * with the device limits and/or software-configured limits). If not,
420 * then obviously we have a lot of cleanup to do before returning.
421 *
422 * NOTE: the first condition deals with the (test) case of cs_sq
423 * being just less than 2^32. In this case, the headroom addition
424 * to the requested cs_sq will pass the test when it should not.
425 * This test no longer lets that case slip through the check.
426 */
427 if ((attr_p->qp_sizes.cs_sq >
428 (1 << state->hs_cfg_profile->cp_log_max_qp_sz)) ||
429 (log_qp_sq_size > state->hs_cfg_profile->cp_log_max_qp_sz) ||
430 (!qp_srq_en && (log_qp_rq_size >
431 state->hs_cfg_profile->cp_log_max_qp_sz))) {
432 status = IBT_HCA_WR_EXCEEDED;
433 goto qpalloc_fail7;
434 }
435
436 /*
437 * Allocate the memory for QP work queues. Since Hermon work queues
438 * are not allowed to cross a 32-bit (4GB) boundary, the alignment of
439 * the work queue memory is very important. We used to allocate
440 * work queues (the combined receive and send queues) so that they
441 * would be aligned on their combined size. That alignment guaranteed
442 * that they would never cross the 4GB boundary (Hermon work queues
443 * are on the order of MBs at maximum). Now we are able to relax
444 * this alignment constraint by ensuring that the IB address assigned
445 * to the queue memory (as a result of the hermon_mr_register() call)
446 * is offset from zero.
447 * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
448 * guarantee the alignment, but when attempting to use IOMMU bypass
449 * mode we found that we were not allowed to specify any alignment
450 * that was more restrictive than the system page size.
451 * So we avoided this constraint by passing two alignment values,
452 * one for the memory allocation itself and the other for the DMA
453 * handle (for later bind). This used to cause more memory than
454 * necessary to be allocated (in order to guarantee the more
455 * restrictive alignment contraint). But by guaranteeing the
456 * zero-based IB virtual address for the queue, we are able to
457 * conserve this memory.
458 */
459 sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
460 sq_depth = 1 << log_qp_sq_size;
461 sq_size = sq_depth * sq_wqe_size;
462
463 /* QP on SRQ sets these to 0 */
464 if (qp_srq_en) {
465 rq_wqe_size = 0;
466 rq_size = 0;
467 } else {
468 rq_wqe_size = 1 << qp->qp_rq_log_wqesz;
469 rq_depth = 1 << log_qp_rq_size;
470 rq_size = rq_depth * rq_wqe_size;
471 }
472
473 qp->qp_wqinfo.qa_size = sq_size + rq_size;
474
475 qp->qp_wqinfo.qa_alloc_align = PAGESIZE;
476 qp->qp_wqinfo.qa_bind_align = PAGESIZE;
477
478 if (qp_is_umap) {
479 qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
480 } else {
481 qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
482 }
483 status = hermon_queue_alloc(state, &qp->qp_wqinfo, sleepflag);
484 if (status != DDI_SUCCESS) {
485 status = IBT_INSUFF_RESOURCE;
486 goto qpalloc_fail7;
487 }
488
489 /*
490 * Sort WQs in memory according to stride (*q_wqe_size), largest first
491 * If they are equal, still put the SQ first
492 */
493 qp->qp_sq_baseaddr = 0;
494 qp->qp_rq_baseaddr = 0;
495 if ((sq_wqe_size > rq_wqe_size) || (sq_wqe_size == rq_wqe_size)) {
496 sq_buf = qp->qp_wqinfo.qa_buf_aligned;
497
498 /* if this QP is on an SRQ, set the rq_buf to NULL */
499 if (qp_srq_en) {
500 rq_buf = NULL;
501 } else {
502 rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size);
503 qp->qp_rq_baseaddr = sq_size;
504 }
505 } else {
506 rq_buf = qp->qp_wqinfo.qa_buf_aligned;
507 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size);
508 qp->qp_sq_baseaddr = rq_size;
509 }
510
511 if (qp_is_umap == 0) {
512 qp->qp_sq_wqhdr = hermon_wrid_wqhdr_create(sq_depth);
513 if (qp->qp_sq_wqhdr == NULL) {
514 status = IBT_INSUFF_RESOURCE;
515 goto qpalloc_fail8;
516 }
517 if (qp_srq_en) {
518 qp->qp_rq_wqavl.wqa_wq = srq->srq_wq_wqhdr;
519 qp->qp_rq_wqavl.wqa_srq_en = 1;
520 qp->qp_rq_wqavl.wqa_srq = srq;
521 } else {
522 qp->qp_rq_wqhdr = hermon_wrid_wqhdr_create(rq_depth);
523 if (qp->qp_rq_wqhdr == NULL) {
524 status = IBT_INSUFF_RESOURCE;
525 goto qpalloc_fail8;
526 }
527 qp->qp_rq_wqavl.wqa_wq = qp->qp_rq_wqhdr;
528 }
529 qp->qp_sq_wqavl.wqa_qpn = qp->qp_qpnum;
530 qp->qp_sq_wqavl.wqa_type = HERMON_WR_SEND;
531 qp->qp_sq_wqavl.wqa_wq = qp->qp_sq_wqhdr;
532 qp->qp_rq_wqavl.wqa_qpn = qp->qp_qpnum;
533 qp->qp_rq_wqavl.wqa_type = HERMON_WR_RECV;
534 }
535
536 /*
537 * Register the memory for the QP work queues. The memory for the
538 * QP must be registered in the Hermon cMPT tables. This gives us the
539 * LKey to specify in the QP context later. Note: The memory for
540 * Hermon work queues (both Send and Recv) must be contiguous and
541 * registered as a single memory region. Note: If the QP memory is
542 * user-mappable, force DDI_DMA_CONSISTENT mapping. Also, in order to
543 * meet the alignment restriction, we pass the "mro_bind_override_addr"
544 * flag in the call to hermon_mr_register(). This guarantees that the
545 * resulting IB vaddr will be zero-based (modulo the offset into the
546 * first page). If we fail here, we still have the bunch of resource
547 * and reference count cleanup to do.
548 */
549 flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP :
550 IBT_MR_NOSLEEP;
551 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
552 mr_attr.mr_len = qp->qp_wqinfo.qa_size;
553 mr_attr.mr_as = NULL;
554 mr_attr.mr_flags = flag;
555 if (qp_is_umap) {
556 mr_op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass;
557 } else {
558 /* HERMON_QUEUE_LOCATION_NORMAL */
559 mr_op.mro_bind_type =
560 state->hs_cfg_profile->cp_iommu_bypass;
561 }
562 mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl;
563 mr_op.mro_bind_override_addr = 1;
564 status = hermon_mr_register(state, pd, &mr_attr, &mr,
565 &mr_op, HERMON_QP_CMPT);
566 if (status != DDI_SUCCESS) {
567 status = IBT_INSUFF_RESOURCE;
568 goto qpalloc_fail9;
569 }
570
571 /*
572 * Calculate the offset between the kernel virtual address space
573 * and the IB virtual address space. This will be used when
574 * posting work requests to properly initialize each WQE.
575 */
576 qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
577 (uint64_t)mr->mr_bindinfo.bi_addr;
578
579 /*
580 * Fill in all the return arguments (if necessary). This includes
581 * real work queue sizes (in wqes), real SGLs, and QP number
582 */
583 if (queuesz_p != NULL) {
584 queuesz_p->cs_sq =
585 (1 << log_qp_sq_size) - qp->qp_sq_hdrmwqes;
586 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl;
587
588 /* if this QP is on an SRQ, set these to 0 */
589 if (qp_srq_en) {
590 queuesz_p->cs_rq = 0;
591 queuesz_p->cs_rq_sgl = 0;
592 } else {
593 queuesz_p->cs_rq = (1 << log_qp_rq_size);
594 queuesz_p->cs_rq_sgl = qp->qp_rq_sgl;
595 }
596 }
597 if (qpn != NULL) {
598 *qpn = (ib_qpn_t)qp->qp_qpnum;
599 }
600
601 /*
602 * Fill in the rest of the Hermon Queue Pair handle.
603 */
604 qp->qp_qpcrsrcp = qpc;
605 qp->qp_rsrcp = rsrc;
606 qp->qp_state = HERMON_QP_RESET;
607 HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET);
608 qp->qp_pdhdl = pd;
609 qp->qp_mrhdl = mr;
610 qp->qp_sq_sigtype = (attr_p->qp_flags & IBT_WR_SIGNALED) ?
611 HERMON_QP_SQ_WR_SIGNALED : HERMON_QP_SQ_ALL_SIGNALED;
612 qp->qp_is_special = 0;
613 qp->qp_uarpg = uarpg;
614 qp->qp_umap_dhp = (devmap_cookie_t)NULL;
615 qp->qp_sq_cqhdl = sq_cq;
616 qp->qp_sq_bufsz = (1 << log_qp_sq_size);
617 qp->qp_sq_logqsz = log_qp_sq_size;
618 qp->qp_sq_buf = sq_buf;
619 qp->qp_desc_off = qp_desc_off;
620 qp->qp_rq_cqhdl = rq_cq;
621 qp->qp_rq_buf = rq_buf;
622 qp->qp_rlky = (attr_p->qp_flags & IBT_FAST_REG_RES_LKEY) !=
623 0;
624
625 /* if this QP is on an SRQ, set rq_bufsz to 0 */
626 if (qp_srq_en) {
627 qp->qp_rq_bufsz = 0;
628 qp->qp_rq_logqsz = 0;
629 } else {
630 qp->qp_rq_bufsz = (1 << log_qp_rq_size);
631 qp->qp_rq_logqsz = log_qp_rq_size;
632 }
633
634 qp->qp_forward_sqd_event = 0;
635 qp->qp_sqd_still_draining = 0;
636 qp->qp_hdlrarg = (void *)ibt_qphdl;
637 qp->qp_mcg_refcnt = 0;
638
639 /*
640 * If this QP is to be associated with an SRQ, set the SRQ handle
641 */
642 if (qp_srq_en) {
643 qp->qp_srqhdl = srq;
644 hermon_srq_refcnt_inc(qp->qp_srqhdl);
645 } else {
646 qp->qp_srqhdl = NULL;
647 }
648
649 /* Determine the QP service type */
650 qp->qp_type = type;
651 if (type == IBT_RC_RQP) {
652 qp->qp_serv_type = HERMON_QP_RC;
653 } else if (type == IBT_UD_RQP) {
654 if (alloc_flags & IBT_QP_USES_RFCI)
655 qp->qp_serv_type = HERMON_QP_RFCI;
656 else if (alloc_flags & IBT_QP_USES_FCMD)
657 qp->qp_serv_type = HERMON_QP_FCMND;
658 else
659 qp->qp_serv_type = HERMON_QP_UD;
660 } else {
661 qp->qp_serv_type = HERMON_QP_UC;
662 }
663
664 /*
665 * Initialize the RQ WQEs - unlike Arbel, no Rcv init is needed
666 */
667
668 /*
669 * Initialize the SQ WQEs - all that needs to be done is every 64 bytes
670 * set the quadword to all F's - high-order bit is owner (init to one)
671 * and the rest for the headroom definition of prefetching
672 *
673 */
674 wqesz_shift = qp->qp_sq_log_wqesz;
675 thewqesz = 1 << wqesz_shift;
676 thewqe = (uint64_t *)(void *)(qp->qp_sq_buf);
677 if (qp_is_umap == 0) {
678 for (i = 0; i < sq_depth; i++) {
679 /*
680 * for each stride, go through and every 64 bytes
681 * write the init value - having set the address
682 * once, just keep incrementing it
683 */
684 for (j = 0; j < thewqesz; j += 64, thewqe += 8) {
685 *(uint32_t *)thewqe = 0xFFFFFFFF;
686 }
687 }
688 }
689
690 /* Zero out the QP context */
691 bzero(&qp->qpc, sizeof (hermon_hw_qpc_t));
692
693 /*
694 * Put QP handle in Hermon QPNum-to-QPHdl list. Then fill in the
695 * "qphdl" and return success
696 */
697 hermon_icm_set_num_to_hdl(state, HERMON_QPC, qpc->hr_indx, qp);
698
699 /*
700 * If this is a user-mappable QP, then we need to insert the previously
701 * allocated entry into the "userland resources database". This will
702 * allow for later lookup during devmap() (i.e. mmap()) calls.
703 */
704 if (qp_is_umap) {
705 hermon_umap_db_add(umapdb);
706 }
707 mutex_init(&qp->qp_sq_lock, NULL, MUTEX_DRIVER,
708 DDI_INTR_PRI(state->hs_intrmsi_pri));
709
710 *qphdl = qp;
711
712 return (DDI_SUCCESS);
713
714 /*
715 * The following is cleanup for all possible failure cases in this routine
716 */
717 qpalloc_fail9:
718 hermon_queue_free(&qp->qp_wqinfo);
719 qpalloc_fail8:
720 if (qp->qp_sq_wqhdr)
721 hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr);
722 if (qp->qp_rq_wqhdr)
723 hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr);
724 qpalloc_fail7:
725 if (qp_is_umap) {
726 hermon_umap_db_free(umapdb);
727 }
728 if (!qp_srq_en) {
729 hermon_dbr_free(state, uarpg, qp->qp_rq_vdbr);
730 }
731
732 qpalloc_fail6:
733 /*
734 * Releasing the QPN will also free up the QPC context. Update
735 * the QPC context pointer to indicate this.
736 */
737 if (qp->qp_qpn_hdl) {
738 hermon_qp_release_qpn(state, qp->qp_qpn_hdl,
739 HERMON_QPN_RELEASE);
740 } else {
741 hermon_rsrc_free(state, &qpc);
742 }
743 qpc = NULL;
744 qpalloc_fail5:
745 hermon_rsrc_free(state, &rsrc);
746 qpalloc_fail4:
747 if (qpc) {
748 hermon_rsrc_free(state, &qpc);
749 }
750 qpalloc_fail3:
751 hermon_cq_refcnt_dec(rq_cq);
752 qpalloc_fail2:
753 hermon_cq_refcnt_dec(sq_cq);
754 qpalloc_fail1:
755 hermon_pd_refcnt_dec(pd);
756 qpalloc_fail:
757 return (status);
758 }
759
760
761
762 /*
763 * hermon_special_qp_alloc()
764 * Context: Can be called only from user or kernel context.
765 */
766 int
hermon_special_qp_alloc(hermon_state_t * state,hermon_qp_info_t * qpinfo,uint_t sleepflag)767 hermon_special_qp_alloc(hermon_state_t *state, hermon_qp_info_t *qpinfo,
768 uint_t sleepflag)
769 {
770 hermon_rsrc_t *qpc, *rsrc;
771 hermon_qphdl_t qp;
772 ibt_qp_alloc_attr_t *attr_p;
773 ibt_sqp_type_t type;
774 uint8_t port;
775 ibtl_qp_hdl_t ibt_qphdl;
776 ibt_chan_sizes_t *queuesz_p;
777 hermon_qphdl_t *qphdl;
778 ibt_mr_attr_t mr_attr;
779 hermon_mr_options_t mr_op;
780 hermon_pdhdl_t pd;
781 hermon_cqhdl_t sq_cq, rq_cq;
782 hermon_mrhdl_t mr;
783 uint64_t qp_desc_off;
784 uint64_t *thewqe, thewqesz;
785 uint32_t *sq_buf, *rq_buf;
786 uint32_t log_qp_sq_size, log_qp_rq_size;
787 uint32_t sq_size, rq_size, max_sgl;
788 uint32_t uarpg;
789 uint32_t sq_depth;
790 uint32_t sq_wqe_size, rq_wqe_size, wqesz_shift;
791 int status, flag, i, j;
792
793 /*
794 * Extract the necessary info from the hermon_qp_info_t structure
795 */
796 attr_p = qpinfo->qpi_attrp;
797 type = qpinfo->qpi_type;
798 port = qpinfo->qpi_port;
799 ibt_qphdl = qpinfo->qpi_ibt_qphdl;
800 queuesz_p = qpinfo->qpi_queueszp;
801 qphdl = &qpinfo->qpi_qphdl;
802
803 /*
804 * Check for valid special QP type (only SMI & GSI supported)
805 */
806 if ((type != IBT_SMI_SQP) && (type != IBT_GSI_SQP)) {
807 status = IBT_QP_SPECIAL_TYPE_INVALID;
808 goto spec_qpalloc_fail;
809 }
810
811 /*
812 * Check for valid port number
813 */
814 if (!hermon_portnum_is_valid(state, port)) {
815 status = IBT_HCA_PORT_INVALID;
816 goto spec_qpalloc_fail;
817 }
818 port = port - 1;
819
820 /*
821 * Check for valid PD handle pointer
822 */
823 if (attr_p->qp_pd_hdl == NULL) {
824 status = IBT_PD_HDL_INVALID;
825 goto spec_qpalloc_fail;
826 }
827 pd = (hermon_pdhdl_t)attr_p->qp_pd_hdl;
828
829 /* Increment the reference count on the PD */
830 hermon_pd_refcnt_inc(pd);
831
832 /*
833 * Check for valid CQ handle pointers
834 */
835 if ((attr_p->qp_ibc_scq_hdl == NULL) ||
836 (attr_p->qp_ibc_rcq_hdl == NULL)) {
837 status = IBT_CQ_HDL_INVALID;
838 goto spec_qpalloc_fail1;
839 }
840 sq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_scq_hdl;
841 rq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_rcq_hdl;
842
843 /*
844 * Increment the reference count on the CQs. One or both of these
845 * could return error if we determine that the given CQ is already
846 * being used with a non-special QP (i.e. a normal QP).
847 */
848 status = hermon_cq_refcnt_inc(sq_cq, HERMON_CQ_IS_SPECIAL);
849 if (status != DDI_SUCCESS) {
850 status = IBT_CQ_HDL_INVALID;
851 goto spec_qpalloc_fail1;
852 }
853 status = hermon_cq_refcnt_inc(rq_cq, HERMON_CQ_IS_SPECIAL);
854 if (status != DDI_SUCCESS) {
855 status = IBT_CQ_HDL_INVALID;
856 goto spec_qpalloc_fail2;
857 }
858
859 /*
860 * Allocate the special QP resources. Essentially, this allocation
861 * amounts to checking if the request special QP has already been
862 * allocated. If successful, the QP context return is an actual
863 * QP context that has been "aliased" to act as a special QP of the
864 * appropriate type (and for the appropriate port). Just as in
865 * hermon_qp_alloc() above, ownership for this QP context is not
866 * immediately given to hardware in the final step here. Instead, we
867 * wait until the QP is later transitioned to the "Init" state before
868 * passing the QP to hardware. If we fail here, we must undo all
869 * the reference count (CQ and PD).
870 */
871 status = hermon_special_qp_rsrc_alloc(state, type, port, &qpc);
872 if (status != DDI_SUCCESS) {
873 goto spec_qpalloc_fail3;
874 }
875
876 /*
877 * Allocate the software structure for tracking the special queue
878 * pair (i.e. the Hermon Queue Pair handle). If we fail here, we
879 * must undo the reference counts and the previous resource allocation.
880 */
881 status = hermon_rsrc_alloc(state, HERMON_QPHDL, 1, sleepflag, &rsrc);
882 if (status != DDI_SUCCESS) {
883 status = IBT_INSUFF_RESOURCE;
884 goto spec_qpalloc_fail4;
885 }
886 qp = (hermon_qphdl_t)rsrc->hr_addr;
887
888 bzero(qp, sizeof (struct hermon_sw_qp_s));
889
890 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp))
891 qp->qp_alloc_flags = attr_p->qp_alloc_flags;
892
893 /*
894 * Actual QP number is a combination of the index of the QPC and
895 * the port number. This is because the special QP contexts must
896 * be allocated two-at-a-time.
897 */
898 qp->qp_qpnum = qpc->hr_indx + port;
899 qp->qp_ring = qp->qp_qpnum << 8;
900
901 uarpg = state->hs_kernel_uar_index; /* must be for spec qp */
902 /*
903 * Allocate the doorbell record. Hermon uses only one for the RQ so
904 * alloc a qp doorbell, using uarpg (above) as the uar index
905 */
906
907 status = hermon_dbr_alloc(state, uarpg, &qp->qp_rq_dbr_acchdl,
908 &qp->qp_rq_vdbr, &qp->qp_rq_pdbr, &qp->qp_rdbr_mapoffset);
909 if (status != DDI_SUCCESS) {
910 status = IBT_INSUFF_RESOURCE;
911 goto spec_qpalloc_fail5;
912 }
913 /*
914 * Calculate the appropriate size for the work queues.
915 * Note: All Hermon QP work queues must be a power-of-2 in size. Also
916 * they may not be any smaller than HERMON_QP_MIN_SIZE. This step is
917 * to round the requested size up to the next highest power-of-2
918 */
919 attr_p->qp_sizes.cs_sq =
920 max(attr_p->qp_sizes.cs_sq, HERMON_QP_MIN_SIZE);
921 attr_p->qp_sizes.cs_rq =
922 max(attr_p->qp_sizes.cs_rq, HERMON_QP_MIN_SIZE);
923 log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq);
924 if ((attr_p->qp_sizes.cs_sq & (attr_p->qp_sizes.cs_sq - 1)) == 0) {
925 log_qp_sq_size = log_qp_sq_size - 1;
926 }
927 log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq);
928 if ((attr_p->qp_sizes.cs_rq & (attr_p->qp_sizes.cs_rq - 1)) == 0) {
929 log_qp_rq_size = log_qp_rq_size - 1;
930 }
931
932 /*
933 * Next we verify that the rounded-up size is valid (i.e. consistent
934 * with the device limits and/or software-configured limits). If not,
935 * then obviously we have a bit of cleanup to do before returning.
936 */
937 if ((log_qp_sq_size > state->hs_cfg_profile->cp_log_max_qp_sz) ||
938 (log_qp_rq_size > state->hs_cfg_profile->cp_log_max_qp_sz)) {
939 status = IBT_HCA_WR_EXCEEDED;
940 goto spec_qpalloc_fail5a;
941 }
942
943 /*
944 * Next we verify that the requested number of SGL is valid (i.e.
945 * consistent with the device limits and/or software-configured
946 * limits). If not, then obviously the same cleanup needs to be done.
947 */
948 max_sgl = state->hs_cfg_profile->cp_wqe_real_max_sgl;
949 if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) ||
950 (attr_p->qp_sizes.cs_rq_sgl > max_sgl)) {
951 status = IBT_HCA_SGL_EXCEEDED;
952 goto spec_qpalloc_fail5a;
953 }
954
955 /*
956 * Determine this QP's WQE stride (for both the Send and Recv WQEs).
957 * This will depend on the requested number of SGLs. Note: this
958 * has the side-effect of also calculating the real number of SGLs
959 * (for the calculated WQE size).
960 */
961 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl,
962 max_sgl, HERMON_QP_WQ_TYPE_RECVQ,
963 &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl);
964 if (type == IBT_SMI_SQP) {
965 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
966 max_sgl, HERMON_QP_WQ_TYPE_SENDMLX_QP0,
967 &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl);
968 } else {
969 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
970 max_sgl, HERMON_QP_WQ_TYPE_SENDMLX_QP1,
971 &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl);
972 }
973
974 /*
975 * Allocate the memory for QP work queues. Since Hermon work queues
976 * are not allowed to cross a 32-bit (4GB) boundary, the alignment of
977 * the work queue memory is very important. We used to allocate
978 * work queues (the combined receive and send queues) so that they
979 * would be aligned on their combined size. That alignment guaranteed
980 * that they would never cross the 4GB boundary (Hermon work queues
981 * are on the order of MBs at maximum). Now we are able to relax
982 * this alignment constraint by ensuring that the IB address assigned
983 * to the queue memory (as a result of the hermon_mr_register() call)
984 * is offset from zero.
985 * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
986 * guarantee the alignment, but when attempting to use IOMMU bypass
987 * mode we found that we were not allowed to specify any alignment
988 * that was more restrictive than the system page size.
989 * So we avoided this constraint by passing two alignment values,
990 * one for the memory allocation itself and the other for the DMA
991 * handle (for later bind). This used to cause more memory than
992 * necessary to be allocated (in order to guarantee the more
993 * restrictive alignment contraint). But by guaranteeing the
994 * zero-based IB virtual address for the queue, we are able to
995 * conserve this memory.
996 */
997 sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
998 sq_depth = 1 << log_qp_sq_size;
999 sq_size = (1 << log_qp_sq_size) * sq_wqe_size;
1000
1001 rq_wqe_size = 1 << qp->qp_rq_log_wqesz;
1002 rq_size = (1 << log_qp_rq_size) * rq_wqe_size;
1003
1004 qp->qp_wqinfo.qa_size = sq_size + rq_size;
1005
1006 qp->qp_wqinfo.qa_alloc_align = PAGESIZE;
1007 qp->qp_wqinfo.qa_bind_align = PAGESIZE;
1008 qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
1009
1010 status = hermon_queue_alloc(state, &qp->qp_wqinfo, sleepflag);
1011 if (status != NULL) {
1012 status = IBT_INSUFF_RESOURCE;
1013 goto spec_qpalloc_fail5a;
1014 }
1015
1016 /*
1017 * Sort WQs in memory according to depth, stride (*q_wqe_size),
1018 * biggest first. If equal, the Send Queue still goes first
1019 */
1020 qp->qp_sq_baseaddr = 0;
1021 qp->qp_rq_baseaddr = 0;
1022 if ((sq_wqe_size > rq_wqe_size) || (sq_wqe_size == rq_wqe_size)) {
1023 sq_buf = qp->qp_wqinfo.qa_buf_aligned;
1024 rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size);
1025 qp->qp_rq_baseaddr = sq_size;
1026 } else {
1027 rq_buf = qp->qp_wqinfo.qa_buf_aligned;
1028 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size);
1029 qp->qp_sq_baseaddr = rq_size;
1030 }
1031
1032 qp->qp_sq_wqhdr = hermon_wrid_wqhdr_create(sq_depth);
1033 if (qp->qp_sq_wqhdr == NULL) {
1034 status = IBT_INSUFF_RESOURCE;
1035 goto spec_qpalloc_fail6;
1036 }
1037 qp->qp_rq_wqhdr = hermon_wrid_wqhdr_create(1 << log_qp_rq_size);
1038 if (qp->qp_rq_wqhdr == NULL) {
1039 status = IBT_INSUFF_RESOURCE;
1040 goto spec_qpalloc_fail6;
1041 }
1042 qp->qp_sq_wqavl.wqa_qpn = qp->qp_qpnum;
1043 qp->qp_sq_wqavl.wqa_type = HERMON_WR_SEND;
1044 qp->qp_sq_wqavl.wqa_wq = qp->qp_sq_wqhdr;
1045 qp->qp_rq_wqavl.wqa_qpn = qp->qp_qpnum;
1046 qp->qp_rq_wqavl.wqa_type = HERMON_WR_RECV;
1047 qp->qp_rq_wqavl.wqa_wq = qp->qp_rq_wqhdr;
1048
1049 /*
1050 * Register the memory for the special QP work queues. The memory for
1051 * the special QP must be registered in the Hermon cMPT tables. This
1052 * gives us the LKey to specify in the QP context later. Note: The
1053 * memory for Hermon work queues (both Send and Recv) must be contiguous
1054 * and registered as a single memory region. Also, in order to meet the
1055 * alignment restriction, we pass the "mro_bind_override_addr" flag in
1056 * the call to hermon_mr_register(). This guarantees that the resulting
1057 * IB vaddr will be zero-based (modulo the offset into the first page).
1058 * If we fail here, we have a bunch of resource and reference count
1059 * cleanup to do.
1060 */
1061 flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP :
1062 IBT_MR_NOSLEEP;
1063 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
1064 mr_attr.mr_len = qp->qp_wqinfo.qa_size;
1065 mr_attr.mr_as = NULL;
1066 mr_attr.mr_flags = flag;
1067
1068 mr_op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass;
1069 mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl;
1070 mr_op.mro_bind_override_addr = 1;
1071
1072 status = hermon_mr_register(state, pd, &mr_attr, &mr, &mr_op,
1073 HERMON_QP_CMPT);
1074 if (status != DDI_SUCCESS) {
1075 status = IBT_INSUFF_RESOURCE;
1076 goto spec_qpalloc_fail6;
1077 }
1078
1079 /*
1080 * Calculate the offset between the kernel virtual address space
1081 * and the IB virtual address space. This will be used when
1082 * posting work requests to properly initialize each WQE.
1083 */
1084 qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
1085 (uint64_t)mr->mr_bindinfo.bi_addr;
1086
1087 /* set the prefetch - initially, not prefetching */
1088 qp->qp_no_prefetch = 1;
1089
1090 if (qp->qp_no_prefetch)
1091 qp->qp_sq_headroom = 2 * sq_wqe_size;
1092 else
1093 qp->qp_sq_headroom = sq_wqe_size + HERMON_QP_OH_SIZE;
1094 /*
1095 * hdrm wqes must be integral since both sq_wqe_size &
1096 * HERMON_QP_OH_SIZE are power of 2
1097 */
1098 qp->qp_sq_hdrmwqes = (qp->qp_sq_headroom / sq_wqe_size);
1099 /*
1100 * Fill in all the return arguments (if necessary). This includes
1101 * real work queue sizes, real SGLs, and QP number (which will be
1102 * either zero or one, depending on the special QP type)
1103 */
1104 if (queuesz_p != NULL) {
1105 queuesz_p->cs_sq =
1106 (1 << log_qp_sq_size) - qp->qp_sq_hdrmwqes;
1107 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl;
1108 queuesz_p->cs_rq = (1 << log_qp_rq_size);
1109 queuesz_p->cs_rq_sgl = qp->qp_rq_sgl;
1110 }
1111
1112 /*
1113 * Fill in the rest of the Hermon Queue Pair handle. We can update
1114 * the following fields for use in further operations on the QP.
1115 */
1116 qp->qp_qpcrsrcp = qpc;
1117 qp->qp_rsrcp = rsrc;
1118 qp->qp_state = HERMON_QP_RESET;
1119 HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET);
1120 qp->qp_pdhdl = pd;
1121 qp->qp_mrhdl = mr;
1122 qp->qp_sq_sigtype = (attr_p->qp_flags & IBT_WR_SIGNALED) ?
1123 HERMON_QP_SQ_WR_SIGNALED : HERMON_QP_SQ_ALL_SIGNALED;
1124 qp->qp_is_special = (type == IBT_SMI_SQP) ?
1125 HERMON_QP_SMI : HERMON_QP_GSI;
1126 qp->qp_uarpg = uarpg;
1127 qp->qp_umap_dhp = (devmap_cookie_t)NULL;
1128 qp->qp_sq_cqhdl = sq_cq;
1129 qp->qp_sq_bufsz = (1 << log_qp_sq_size);
1130 qp->qp_sq_buf = sq_buf;
1131 qp->qp_sq_logqsz = log_qp_sq_size;
1132 qp->qp_desc_off = qp_desc_off;
1133 qp->qp_rq_cqhdl = rq_cq;
1134 qp->qp_rq_bufsz = (1 << log_qp_rq_size);
1135 qp->qp_rq_buf = rq_buf;
1136 qp->qp_rq_logqsz = log_qp_rq_size;
1137 qp->qp_portnum = port;
1138 qp->qp_pkeyindx = 0;
1139 qp->qp_forward_sqd_event = 0;
1140 qp->qp_sqd_still_draining = 0;
1141 qp->qp_hdlrarg = (void *)ibt_qphdl;
1142 qp->qp_mcg_refcnt = 0;
1143 qp->qp_srqhdl = NULL;
1144
1145 /* All special QPs are UD QP service type */
1146 qp->qp_type = IBT_UD_RQP;
1147 qp->qp_serv_type = HERMON_QP_UD;
1148
1149 /*
1150 * Initialize the RQ WQEs - unlike Arbel, no Rcv init is needed
1151 */
1152
1153 /*
1154 * Initialize the SQ WQEs - all that needs to be done is every 64 bytes
1155 * set the quadword to all F's - high-order bit is owner (init to one)
1156 * and the rest for the headroom definition of prefetching
1157 *
1158 */
1159
1160 wqesz_shift = qp->qp_sq_log_wqesz;
1161 thewqesz = 1 << wqesz_shift;
1162 thewqe = (uint64_t *)(void *)(qp->qp_sq_buf);
1163 for (i = 0; i < sq_depth; i++) {
1164 /*
1165 * for each stride, go through and every 64 bytes write the
1166 * init value - having set the address once, just keep
1167 * incrementing it
1168 */
1169 for (j = 0; j < thewqesz; j += 64, thewqe += 8) {
1170 *(uint32_t *)thewqe = 0xFFFFFFFF;
1171 }
1172 }
1173
1174
1175 /* Zero out the QP context */
1176 bzero(&qp->qpc, sizeof (hermon_hw_qpc_t));
1177
1178 /*
1179 * Put QP handle in Hermon QPNum-to-QPHdl list. Then fill in the
1180 * "qphdl" and return success
1181 */
1182 hermon_icm_set_num_to_hdl(state, HERMON_QPC, qpc->hr_indx + port, qp);
1183
1184 mutex_init(&qp->qp_sq_lock, NULL, MUTEX_DRIVER,
1185 DDI_INTR_PRI(state->hs_intrmsi_pri));
1186
1187 *qphdl = qp;
1188
1189 return (DDI_SUCCESS);
1190
1191 /*
1192 * The following is cleanup for all possible failure cases in this routine
1193 */
1194 spec_qpalloc_fail6:
1195 hermon_queue_free(&qp->qp_wqinfo);
1196 if (qp->qp_sq_wqhdr)
1197 hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr);
1198 if (qp->qp_rq_wqhdr)
1199 hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr);
1200 spec_qpalloc_fail5a:
1201 hermon_dbr_free(state, uarpg, qp->qp_rq_vdbr);
1202 spec_qpalloc_fail5:
1203 hermon_rsrc_free(state, &rsrc);
1204 spec_qpalloc_fail4:
1205 if (hermon_special_qp_rsrc_free(state, type, port) != DDI_SUCCESS) {
1206 HERMON_WARNING(state, "failed to free special QP rsrc");
1207 }
1208 spec_qpalloc_fail3:
1209 hermon_cq_refcnt_dec(rq_cq);
1210 spec_qpalloc_fail2:
1211 hermon_cq_refcnt_dec(sq_cq);
1212 spec_qpalloc_fail1:
1213 hermon_pd_refcnt_dec(pd);
1214 spec_qpalloc_fail:
1215 return (status);
1216 }
1217
1218
1219 /*
1220 * hermon_qp_alloc_range()
1221 * Context: Can be called only from user or kernel context.
1222 */
1223 int
hermon_qp_alloc_range(hermon_state_t * state,uint_t log2,hermon_qp_info_t * qpinfo,ibtl_qp_hdl_t * ibt_qphdl,ibc_cq_hdl_t * send_cq,ibc_cq_hdl_t * recv_cq,hermon_qphdl_t * qphdl,uint_t sleepflag)1224 hermon_qp_alloc_range(hermon_state_t *state, uint_t log2,
1225 hermon_qp_info_t *qpinfo, ibtl_qp_hdl_t *ibt_qphdl,
1226 ibc_cq_hdl_t *send_cq, ibc_cq_hdl_t *recv_cq,
1227 hermon_qphdl_t *qphdl, uint_t sleepflag)
1228 {
1229 hermon_rsrc_t *qpc, *rsrc;
1230 hermon_rsrc_type_t rsrc_type;
1231 hermon_qphdl_t qp;
1232 hermon_qp_range_t *qp_range_p;
1233 ibt_qp_alloc_attr_t *attr_p;
1234 ibt_qp_type_t type;
1235 hermon_qp_wq_type_t swq_type;
1236 ibt_chan_sizes_t *queuesz_p;
1237 ibt_mr_attr_t mr_attr;
1238 hermon_mr_options_t mr_op;
1239 hermon_srqhdl_t srq;
1240 hermon_pdhdl_t pd;
1241 hermon_cqhdl_t sq_cq, rq_cq;
1242 hermon_mrhdl_t mr;
1243 uint64_t qp_desc_off;
1244 uint64_t *thewqe, thewqesz;
1245 uint32_t *sq_buf, *rq_buf;
1246 uint32_t log_qp_sq_size, log_qp_rq_size;
1247 uint32_t sq_size, rq_size;
1248 uint32_t sq_depth, rq_depth;
1249 uint32_t sq_wqe_size, rq_wqe_size, wqesz_shift;
1250 uint32_t max_sgl, max_recv_sgl, uarpg;
1251 uint_t qp_srq_en, i, j;
1252 int ii; /* loop counter for range */
1253 int status, flag;
1254 uint_t serv_type;
1255
1256 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p, *queuesz_p))
1257
1258 /*
1259 * Extract the necessary info from the hermon_qp_info_t structure
1260 */
1261 attr_p = qpinfo->qpi_attrp;
1262 type = qpinfo->qpi_type;
1263 queuesz_p = qpinfo->qpi_queueszp;
1264
1265 if (attr_p->qp_alloc_flags & IBT_QP_USES_RSS) {
1266 if (log2 > state->hs_ibtfinfo.hca_attr->hca_rss_max_log2_table)
1267 return (IBT_INSUFF_RESOURCE);
1268 rsrc_type = HERMON_QPC;
1269 serv_type = HERMON_QP_UD;
1270 } else if (attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH) {
1271 if (log2 > state->hs_ibtfinfo.hca_attr->hca_fexch_max_log2_qp)
1272 return (IBT_INSUFF_RESOURCE);
1273 switch (attr_p->qp_fc.fc_hca_port) {
1274 case 1:
1275 rsrc_type = HERMON_QPC_FEXCH_PORT1;
1276 break;
1277 case 2:
1278 rsrc_type = HERMON_QPC_FEXCH_PORT2;
1279 break;
1280 default:
1281 return (IBT_INVALID_PARAM);
1282 }
1283 serv_type = HERMON_QP_FEXCH;
1284 } else
1285 return (IBT_INVALID_PARAM);
1286
1287 /*
1288 * Determine whether QP is being allocated for userland access or
1289 * whether it is being allocated for kernel access. If the QP is
1290 * being allocated for userland access, fail (too complex for now).
1291 */
1292 if (attr_p->qp_alloc_flags & IBT_QP_USER_MAP) {
1293 return (IBT_NOT_SUPPORTED);
1294 } else {
1295 uarpg = state->hs_kernel_uar_index;
1296 }
1297
1298 /*
1299 * Determine whether QP is being associated with an SRQ
1300 */
1301 qp_srq_en = (attr_p->qp_alloc_flags & IBT_QP_USES_SRQ) ? 1 : 0;
1302 if (qp_srq_en) {
1303 /*
1304 * Check for valid SRQ handle pointers
1305 */
1306 if (attr_p->qp_ibc_srq_hdl == NULL) {
1307 return (IBT_SRQ_HDL_INVALID);
1308 }
1309 srq = (hermon_srqhdl_t)attr_p->qp_ibc_srq_hdl;
1310 }
1311
1312 /*
1313 * Check for valid QP service type (only UD supported)
1314 */
1315 if (type != IBT_UD_RQP) {
1316 return (IBT_QP_SRV_TYPE_INVALID);
1317 }
1318
1319 /*
1320 * Check for valid PD handle pointer
1321 */
1322 if (attr_p->qp_pd_hdl == NULL) {
1323 return (IBT_PD_HDL_INVALID);
1324 }
1325 pd = (hermon_pdhdl_t)attr_p->qp_pd_hdl;
1326
1327 /*
1328 * If on an SRQ, check to make sure the PD is the same
1329 */
1330 if (qp_srq_en && (pd->pd_pdnum != srq->srq_pdhdl->pd_pdnum)) {
1331 return (IBT_PD_HDL_INVALID);
1332 }
1333
1334 /* set loop variable here, for freeing resources on error */
1335 ii = 0;
1336
1337 /*
1338 * Allocate 2^log2 contiguous/aligned QP context entries. This will
1339 * be filled in with all the necessary parameters to define the
1340 * Queue Pairs. Unlike other Hermon hardware resources, ownership
1341 * is not immediately given to hardware in the final step here.
1342 * Instead, we must wait until the QP is later transitioned to the
1343 * "Init" state before passing the QP to hardware. If we fail here,
1344 * we must undo all the reference count (CQ and PD).
1345 */
1346 status = hermon_rsrc_alloc(state, rsrc_type, 1 << log2, sleepflag,
1347 &qpc);
1348 if (status != DDI_SUCCESS) {
1349 return (IBT_INSUFF_RESOURCE);
1350 }
1351
1352 if (attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH)
1353 /*
1354 * Need to init the MKEYs for the FEXCH QPs.
1355 *
1356 * For FEXCH QP subranges, we return the QPN base as
1357 * "relative" to the full FEXCH QP range for the port.
1358 */
1359 *(qpinfo->qpi_qpn) = hermon_fcoib_fexch_relative_qpn(state,
1360 attr_p->qp_fc.fc_hca_port, qpc->hr_indx);
1361 else
1362 *(qpinfo->qpi_qpn) = (ib_qpn_t)qpc->hr_indx;
1363
1364 qp_range_p = kmem_alloc(sizeof (*qp_range_p),
1365 (sleepflag == HERMON_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
1366 if (qp_range_p == NULL) {
1367 status = IBT_INSUFF_RESOURCE;
1368 goto qpalloc_fail0;
1369 }
1370 mutex_init(&qp_range_p->hqpr_lock, NULL, MUTEX_DRIVER,
1371 DDI_INTR_PRI(state->hs_intrmsi_pri));
1372 mutex_enter(&qp_range_p->hqpr_lock);
1373 qp_range_p->hqpr_refcnt = 1 << log2;
1374 qp_range_p->hqpr_qpcrsrc = qpc;
1375 mutex_exit(&qp_range_p->hqpr_lock);
1376
1377 for_each_qp:
1378
1379 /* Increment the reference count on the protection domain (PD) */
1380 hermon_pd_refcnt_inc(pd);
1381
1382 rq_cq = (hermon_cqhdl_t)recv_cq[ii];
1383 sq_cq = (hermon_cqhdl_t)send_cq[ii];
1384 if (sq_cq == NULL) {
1385 if (attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH) {
1386 /* if no send completions, just use rq_cq */
1387 sq_cq = rq_cq;
1388 } else {
1389 status = IBT_CQ_HDL_INVALID;
1390 goto qpalloc_fail1;
1391 }
1392 }
1393
1394 /*
1395 * Increment the reference count on the CQs. One or both of these
1396 * could return error if we determine that the given CQ is already
1397 * being used with a special (SMI/GSI) QP.
1398 */
1399 status = hermon_cq_refcnt_inc(sq_cq, HERMON_CQ_IS_NORMAL);
1400 if (status != DDI_SUCCESS) {
1401 status = IBT_CQ_HDL_INVALID;
1402 goto qpalloc_fail1;
1403 }
1404 status = hermon_cq_refcnt_inc(rq_cq, HERMON_CQ_IS_NORMAL);
1405 if (status != DDI_SUCCESS) {
1406 status = IBT_CQ_HDL_INVALID;
1407 goto qpalloc_fail2;
1408 }
1409
1410 /*
1411 * Allocate the software structure for tracking the queue pair
1412 * (i.e. the Hermon Queue Pair handle). If we fail here, we must
1413 * undo the reference counts and the previous resource allocation.
1414 */
1415 status = hermon_rsrc_alloc(state, HERMON_QPHDL, 1, sleepflag, &rsrc);
1416 if (status != DDI_SUCCESS) {
1417 status = IBT_INSUFF_RESOURCE;
1418 goto qpalloc_fail4;
1419 }
1420 qp = (hermon_qphdl_t)rsrc->hr_addr;
1421 bzero(qp, sizeof (struct hermon_sw_qp_s));
1422 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp))
1423 qp->qp_alloc_flags = attr_p->qp_alloc_flags;
1424
1425 /*
1426 * Calculate the QP number from QPC index. This routine handles
1427 * all of the operations necessary to keep track of used, unused,
1428 * and released QP numbers.
1429 */
1430 qp->qp_qpnum = qpc->hr_indx + ii;
1431 qp->qp_ring = qp->qp_qpnum << 8;
1432 qp->qp_qpn_hdl = NULL;
1433
1434 /*
1435 * Allocate the doorbell record. Hermon just needs one for the RQ,
1436 * if the QP is not associated with an SRQ, and use uarpg (above) as
1437 * the uar index
1438 */
1439
1440 if (!qp_srq_en) {
1441 status = hermon_dbr_alloc(state, uarpg, &qp->qp_rq_dbr_acchdl,
1442 &qp->qp_rq_vdbr, &qp->qp_rq_pdbr, &qp->qp_rdbr_mapoffset);
1443 if (status != DDI_SUCCESS) {
1444 status = IBT_INSUFF_RESOURCE;
1445 goto qpalloc_fail6;
1446 }
1447 }
1448
1449 qp->qp_uses_lso = (attr_p->qp_flags & IBT_USES_LSO);
1450
1451 /*
1452 * We verify that the requested number of SGL is valid (i.e.
1453 * consistent with the device limits and/or software-configured
1454 * limits). If not, then obviously the same cleanup needs to be done.
1455 */
1456 max_sgl = state->hs_ibtfinfo.hca_attr->hca_ud_send_sgl_sz;
1457 swq_type = HERMON_QP_WQ_TYPE_SENDQ_UD;
1458 max_recv_sgl = state->hs_ibtfinfo.hca_attr->hca_recv_sgl_sz;
1459 if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) ||
1460 (!qp_srq_en && (attr_p->qp_sizes.cs_rq_sgl > max_recv_sgl))) {
1461 status = IBT_HCA_SGL_EXCEEDED;
1462 goto qpalloc_fail7;
1463 }
1464
1465 /*
1466 * Determine this QP's WQE stride (for both the Send and Recv WQEs).
1467 * This will depend on the requested number of SGLs. Note: this
1468 * has the side-effect of also calculating the real number of SGLs
1469 * (for the calculated WQE size).
1470 *
1471 * For QP's on an SRQ, we set these to 0.
1472 */
1473 if (qp_srq_en) {
1474 qp->qp_rq_log_wqesz = 0;
1475 qp->qp_rq_sgl = 0;
1476 } else {
1477 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl,
1478 max_recv_sgl, HERMON_QP_WQ_TYPE_RECVQ,
1479 &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl);
1480 }
1481 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
1482 max_sgl, swq_type, &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl);
1483
1484 sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
1485
1486 /* NOTE: currently policy in driver, later maybe IBTF interface */
1487 qp->qp_no_prefetch = 0;
1488
1489 /*
1490 * for prefetching, we need to add the number of wqes in
1491 * the 2k area plus one to the number requested, but
1492 * ONLY for send queue. If no_prefetch == 1 (prefetch off)
1493 * it's exactly TWO wqes for the headroom
1494 */
1495 if (qp->qp_no_prefetch)
1496 qp->qp_sq_headroom = 2 * sq_wqe_size;
1497 else
1498 qp->qp_sq_headroom = sq_wqe_size + HERMON_QP_OH_SIZE;
1499 /*
1500 * hdrm wqes must be integral since both sq_wqe_size &
1501 * HERMON_QP_OH_SIZE are power of 2
1502 */
1503 qp->qp_sq_hdrmwqes = (qp->qp_sq_headroom / sq_wqe_size);
1504
1505
1506 /*
1507 * Calculate the appropriate size for the work queues.
1508 * For send queue, add in the headroom wqes to the calculation.
1509 * Note: All Hermon QP work queues must be a power-of-2 in size. Also
1510 * they may not be any smaller than HERMON_QP_MIN_SIZE. This step is
1511 * to round the requested size up to the next highest power-of-2
1512 */
1513 /* first, adjust to a minimum and tell the caller the change */
1514 attr_p->qp_sizes.cs_sq = max(attr_p->qp_sizes.cs_sq,
1515 HERMON_QP_MIN_SIZE);
1516 attr_p->qp_sizes.cs_rq = max(attr_p->qp_sizes.cs_rq,
1517 HERMON_QP_MIN_SIZE);
1518 /*
1519 * now, calculate the alloc size, taking into account
1520 * the headroom for the sq
1521 */
1522 log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes);
1523 /* if the total is a power of two, reduce it */
1524 if (((attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes) &
1525 (attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes - 1)) == 0) {
1526 log_qp_sq_size = log_qp_sq_size - 1;
1527 }
1528
1529 log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq);
1530 if ((attr_p->qp_sizes.cs_rq & (attr_p->qp_sizes.cs_rq - 1)) == 0) {
1531 log_qp_rq_size = log_qp_rq_size - 1;
1532 }
1533
1534 /*
1535 * Next we verify that the rounded-up size is valid (i.e. consistent
1536 * with the device limits and/or software-configured limits). If not,
1537 * then obviously we have a lot of cleanup to do before returning.
1538 *
1539 * NOTE: the first condition deals with the (test) case of cs_sq
1540 * being just less than 2^32. In this case, the headroom addition
1541 * to the requested cs_sq will pass the test when it should not.
1542 * This test no longer lets that case slip through the check.
1543 */
1544 if ((attr_p->qp_sizes.cs_sq >
1545 (1 << state->hs_cfg_profile->cp_log_max_qp_sz)) ||
1546 (log_qp_sq_size > state->hs_cfg_profile->cp_log_max_qp_sz) ||
1547 (!qp_srq_en && (log_qp_rq_size >
1548 state->hs_cfg_profile->cp_log_max_qp_sz))) {
1549 status = IBT_HCA_WR_EXCEEDED;
1550 goto qpalloc_fail7;
1551 }
1552
1553 /*
1554 * Allocate the memory for QP work queues. Since Hermon work queues
1555 * are not allowed to cross a 32-bit (4GB) boundary, the alignment of
1556 * the work queue memory is very important. We used to allocate
1557 * work queues (the combined receive and send queues) so that they
1558 * would be aligned on their combined size. That alignment guaranteed
1559 * that they would never cross the 4GB boundary (Hermon work queues
1560 * are on the order of MBs at maximum). Now we are able to relax
1561 * this alignment constraint by ensuring that the IB address assigned
1562 * to the queue memory (as a result of the hermon_mr_register() call)
1563 * is offset from zero.
1564 * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
1565 * guarantee the alignment, but when attempting to use IOMMU bypass
1566 * mode we found that we were not allowed to specify any alignment
1567 * that was more restrictive than the system page size.
1568 * So we avoided this constraint by passing two alignment values,
1569 * one for the memory allocation itself and the other for the DMA
1570 * handle (for later bind). This used to cause more memory than
1571 * necessary to be allocated (in order to guarantee the more
1572 * restrictive alignment contraint). But by guaranteeing the
1573 * zero-based IB virtual address for the queue, we are able to
1574 * conserve this memory.
1575 */
1576 sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
1577 sq_depth = 1 << log_qp_sq_size;
1578 sq_size = sq_depth * sq_wqe_size;
1579
1580 /* QP on SRQ sets these to 0 */
1581 if (qp_srq_en) {
1582 rq_wqe_size = 0;
1583 rq_size = 0;
1584 } else {
1585 rq_wqe_size = 1 << qp->qp_rq_log_wqesz;
1586 rq_depth = 1 << log_qp_rq_size;
1587 rq_size = rq_depth * rq_wqe_size;
1588 }
1589
1590 qp->qp_wqinfo.qa_size = sq_size + rq_size;
1591 qp->qp_wqinfo.qa_alloc_align = PAGESIZE;
1592 qp->qp_wqinfo.qa_bind_align = PAGESIZE;
1593 qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
1594 status = hermon_queue_alloc(state, &qp->qp_wqinfo, sleepflag);
1595 if (status != DDI_SUCCESS) {
1596 status = IBT_INSUFF_RESOURCE;
1597 goto qpalloc_fail7;
1598 }
1599
1600 /*
1601 * Sort WQs in memory according to stride (*q_wqe_size), largest first
1602 * If they are equal, still put the SQ first
1603 */
1604 qp->qp_sq_baseaddr = 0;
1605 qp->qp_rq_baseaddr = 0;
1606 if ((sq_wqe_size > rq_wqe_size) || (sq_wqe_size == rq_wqe_size)) {
1607 sq_buf = qp->qp_wqinfo.qa_buf_aligned;
1608
1609 /* if this QP is on an SRQ, set the rq_buf to NULL */
1610 if (qp_srq_en) {
1611 rq_buf = NULL;
1612 } else {
1613 rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size);
1614 qp->qp_rq_baseaddr = sq_size;
1615 }
1616 } else {
1617 rq_buf = qp->qp_wqinfo.qa_buf_aligned;
1618 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size);
1619 qp->qp_sq_baseaddr = rq_size;
1620 }
1621
1622 qp->qp_sq_wqhdr = hermon_wrid_wqhdr_create(sq_depth);
1623 if (qp->qp_sq_wqhdr == NULL) {
1624 status = IBT_INSUFF_RESOURCE;
1625 goto qpalloc_fail8;
1626 }
1627 if (qp_srq_en) {
1628 qp->qp_rq_wqavl.wqa_wq = srq->srq_wq_wqhdr;
1629 qp->qp_rq_wqavl.wqa_srq_en = 1;
1630 qp->qp_rq_wqavl.wqa_srq = srq;
1631 } else {
1632 qp->qp_rq_wqhdr = hermon_wrid_wqhdr_create(rq_depth);
1633 if (qp->qp_rq_wqhdr == NULL) {
1634 status = IBT_INSUFF_RESOURCE;
1635 goto qpalloc_fail8;
1636 }
1637 qp->qp_rq_wqavl.wqa_wq = qp->qp_rq_wqhdr;
1638 }
1639 qp->qp_sq_wqavl.wqa_qpn = qp->qp_qpnum;
1640 qp->qp_sq_wqavl.wqa_type = HERMON_WR_SEND;
1641 qp->qp_sq_wqavl.wqa_wq = qp->qp_sq_wqhdr;
1642 qp->qp_rq_wqavl.wqa_qpn = qp->qp_qpnum;
1643 qp->qp_rq_wqavl.wqa_type = HERMON_WR_RECV;
1644
1645 /*
1646 * Register the memory for the QP work queues. The memory for the
1647 * QP must be registered in the Hermon cMPT tables. This gives us the
1648 * LKey to specify in the QP context later. Note: The memory for
1649 * Hermon work queues (both Send and Recv) must be contiguous and
1650 * registered as a single memory region. Note: If the QP memory is
1651 * user-mappable, force DDI_DMA_CONSISTENT mapping. Also, in order to
1652 * meet the alignment restriction, we pass the "mro_bind_override_addr"
1653 * flag in the call to hermon_mr_register(). This guarantees that the
1654 * resulting IB vaddr will be zero-based (modulo the offset into the
1655 * first page). If we fail here, we still have the bunch of resource
1656 * and reference count cleanup to do.
1657 */
1658 flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP :
1659 IBT_MR_NOSLEEP;
1660 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
1661 mr_attr.mr_len = qp->qp_wqinfo.qa_size;
1662 mr_attr.mr_as = NULL;
1663 mr_attr.mr_flags = flag;
1664 /* HERMON_QUEUE_LOCATION_NORMAL */
1665 mr_op.mro_bind_type =
1666 state->hs_cfg_profile->cp_iommu_bypass;
1667 mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl;
1668 mr_op.mro_bind_override_addr = 1;
1669 status = hermon_mr_register(state, pd, &mr_attr, &mr,
1670 &mr_op, HERMON_QP_CMPT);
1671 if (status != DDI_SUCCESS) {
1672 status = IBT_INSUFF_RESOURCE;
1673 goto qpalloc_fail9;
1674 }
1675
1676 /*
1677 * Calculate the offset between the kernel virtual address space
1678 * and the IB virtual address space. This will be used when
1679 * posting work requests to properly initialize each WQE.
1680 */
1681 qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
1682 (uint64_t)mr->mr_bindinfo.bi_addr;
1683
1684 /*
1685 * Fill in all the return arguments (if necessary). This includes
1686 * real work queue sizes (in wqes), real SGLs, and QP number
1687 */
1688 if (queuesz_p != NULL) {
1689 queuesz_p->cs_sq =
1690 (1 << log_qp_sq_size) - qp->qp_sq_hdrmwqes;
1691 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl;
1692
1693 /* if this QP is on an SRQ, set these to 0 */
1694 if (qp_srq_en) {
1695 queuesz_p->cs_rq = 0;
1696 queuesz_p->cs_rq_sgl = 0;
1697 } else {
1698 queuesz_p->cs_rq = (1 << log_qp_rq_size);
1699 queuesz_p->cs_rq_sgl = qp->qp_rq_sgl;
1700 }
1701 }
1702
1703 /*
1704 * Fill in the rest of the Hermon Queue Pair handle.
1705 */
1706 qp->qp_qpcrsrcp = NULL;
1707 qp->qp_rsrcp = rsrc;
1708 qp->qp_state = HERMON_QP_RESET;
1709 HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET);
1710 qp->qp_pdhdl = pd;
1711 qp->qp_mrhdl = mr;
1712 qp->qp_sq_sigtype = (attr_p->qp_flags & IBT_WR_SIGNALED) ?
1713 HERMON_QP_SQ_WR_SIGNALED : HERMON_QP_SQ_ALL_SIGNALED;
1714 qp->qp_is_special = 0;
1715 qp->qp_uarpg = uarpg;
1716 qp->qp_umap_dhp = (devmap_cookie_t)NULL;
1717 qp->qp_sq_cqhdl = sq_cq;
1718 qp->qp_sq_bufsz = (1 << log_qp_sq_size);
1719 qp->qp_sq_logqsz = log_qp_sq_size;
1720 qp->qp_sq_buf = sq_buf;
1721 qp->qp_desc_off = qp_desc_off;
1722 qp->qp_rq_cqhdl = rq_cq;
1723 qp->qp_rq_buf = rq_buf;
1724 qp->qp_rlky = (attr_p->qp_flags & IBT_FAST_REG_RES_LKEY) !=
1725 0;
1726
1727 /* if this QP is on an SRQ, set rq_bufsz to 0 */
1728 if (qp_srq_en) {
1729 qp->qp_rq_bufsz = 0;
1730 qp->qp_rq_logqsz = 0;
1731 } else {
1732 qp->qp_rq_bufsz = (1 << log_qp_rq_size);
1733 qp->qp_rq_logqsz = log_qp_rq_size;
1734 }
1735
1736 qp->qp_forward_sqd_event = 0;
1737 qp->qp_sqd_still_draining = 0;
1738 qp->qp_hdlrarg = (void *)ibt_qphdl[ii];
1739 qp->qp_mcg_refcnt = 0;
1740
1741 /*
1742 * If this QP is to be associated with an SRQ, set the SRQ handle
1743 */
1744 if (qp_srq_en) {
1745 qp->qp_srqhdl = srq;
1746 hermon_srq_refcnt_inc(qp->qp_srqhdl);
1747 } else {
1748 qp->qp_srqhdl = NULL;
1749 }
1750
1751 qp->qp_type = IBT_UD_RQP;
1752 qp->qp_serv_type = serv_type;
1753
1754 /*
1755 * Initialize the RQ WQEs - unlike Arbel, no Rcv init is needed
1756 */
1757
1758 /*
1759 * Initialize the SQ WQEs - all that needs to be done is every 64 bytes
1760 * set the quadword to all F's - high-order bit is owner (init to one)
1761 * and the rest for the headroom definition of prefetching.
1762 */
1763 if ((attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH) == 0) {
1764 wqesz_shift = qp->qp_sq_log_wqesz;
1765 thewqesz = 1 << wqesz_shift;
1766 thewqe = (uint64_t *)(void *)(qp->qp_sq_buf);
1767 for (i = 0; i < sq_depth; i++) {
1768 /*
1769 * for each stride, go through and every 64 bytes
1770 * write the init value - having set the address
1771 * once, just keep incrementing it
1772 */
1773 for (j = 0; j < thewqesz; j += 64, thewqe += 8) {
1774 *(uint32_t *)thewqe = 0xFFFFFFFF;
1775 }
1776 }
1777 }
1778
1779 /* Zero out the QP context */
1780 bzero(&qp->qpc, sizeof (hermon_hw_qpc_t));
1781
1782 /*
1783 * Put QP handle in Hermon QPNum-to-QPHdl list. Then fill in the
1784 * "qphdl" and return success
1785 */
1786 hermon_icm_set_num_to_hdl(state, HERMON_QPC, qpc->hr_indx + ii, qp);
1787
1788 mutex_init(&qp->qp_sq_lock, NULL, MUTEX_DRIVER,
1789 DDI_INTR_PRI(state->hs_intrmsi_pri));
1790
1791 qp->qp_rangep = qp_range_p;
1792
1793 qphdl[ii] = qp;
1794
1795 if (++ii < (1 << log2))
1796 goto for_each_qp;
1797
1798 return (DDI_SUCCESS);
1799
1800 /*
1801 * The following is cleanup for all possible failure cases in this routine
1802 */
1803 qpalloc_fail9:
1804 hermon_queue_free(&qp->qp_wqinfo);
1805 qpalloc_fail8:
1806 if (qp->qp_sq_wqhdr)
1807 hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr);
1808 if (qp->qp_rq_wqhdr)
1809 hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr);
1810 qpalloc_fail7:
1811 if (!qp_srq_en) {
1812 hermon_dbr_free(state, uarpg, qp->qp_rq_vdbr);
1813 }
1814
1815 qpalloc_fail6:
1816 hermon_rsrc_free(state, &rsrc);
1817 qpalloc_fail4:
1818 hermon_cq_refcnt_dec(rq_cq);
1819 qpalloc_fail2:
1820 hermon_cq_refcnt_dec(sq_cq);
1821 qpalloc_fail1:
1822 hermon_pd_refcnt_dec(pd);
1823 qpalloc_fail0:
1824 if (ii == 0) {
1825 if (qp_range_p)
1826 kmem_free(qp_range_p, sizeof (*qp_range_p));
1827 hermon_rsrc_free(state, &qpc);
1828 } else {
1829 /* qp_range_p and qpc rsrc will be freed in hermon_qp_free */
1830
1831 mutex_enter(&qp->qp_rangep->hqpr_lock);
1832 qp_range_p->hqpr_refcnt = ii;
1833 mutex_exit(&qp->qp_rangep->hqpr_lock);
1834 while (--ii >= 0) {
1835 ibc_qpn_hdl_t qpn_hdl;
1836 int free_status;
1837
1838 free_status = hermon_qp_free(state, &qphdl[ii],
1839 IBC_FREE_QP_AND_QPN, &qpn_hdl, sleepflag);
1840 if (free_status != DDI_SUCCESS)
1841 cmn_err(CE_CONT, "!qp_range: status 0x%x: "
1842 "error status %x during free",
1843 status, free_status);
1844 }
1845 }
1846
1847 return (status);
1848 }
1849
1850
1851 /*
1852 * hermon_qp_free()
1853 * This function frees up the QP resources. Depending on the value
1854 * of the "free_qp_flags", the QP number may not be released until
1855 * a subsequent call to hermon_qp_release_qpn().
1856 *
1857 * Context: Can be called only from user or kernel context.
1858 */
1859 /* ARGSUSED */
1860 int
hermon_qp_free(hermon_state_t * state,hermon_qphdl_t * qphdl,ibc_free_qp_flags_t free_qp_flags,ibc_qpn_hdl_t * qpnh,uint_t sleepflag)1861 hermon_qp_free(hermon_state_t *state, hermon_qphdl_t *qphdl,
1862 ibc_free_qp_flags_t free_qp_flags, ibc_qpn_hdl_t *qpnh,
1863 uint_t sleepflag)
1864 {
1865 hermon_rsrc_t *qpc, *rsrc;
1866 hermon_umap_db_entry_t *umapdb;
1867 hermon_qpn_entry_t *entry;
1868 hermon_pdhdl_t pd;
1869 hermon_mrhdl_t mr;
1870 hermon_cqhdl_t sq_cq, rq_cq;
1871 hermon_srqhdl_t srq;
1872 hermon_qphdl_t qp;
1873 uint64_t value;
1874 uint_t type, port;
1875 uint_t maxprot;
1876 uint_t qp_srq_en;
1877 int status;
1878
1879 /*
1880 * Pull all the necessary information from the Hermon Queue Pair
1881 * handle. This is necessary here because the resource for the
1882 * QP handle is going to be freed up as part of this operation.
1883 */
1884 qp = *qphdl;
1885 mutex_enter(&qp->qp_lock);
1886 qpc = qp->qp_qpcrsrcp; /* NULL if part of a "range" */
1887 rsrc = qp->qp_rsrcp;
1888 pd = qp->qp_pdhdl;
1889 srq = qp->qp_srqhdl;
1890 mr = qp->qp_mrhdl;
1891 rq_cq = qp->qp_rq_cqhdl;
1892 sq_cq = qp->qp_sq_cqhdl;
1893 port = qp->qp_portnum;
1894 qp_srq_en = qp->qp_alloc_flags & IBT_QP_USES_SRQ;
1895
1896 /*
1897 * If the QP is part of an MCG, then we fail the qp_free
1898 */
1899 if (qp->qp_mcg_refcnt != 0) {
1900 mutex_exit(&qp->qp_lock);
1901 status = ibc_get_ci_failure(0);
1902 goto qpfree_fail;
1903 }
1904
1905 /*
1906 * If the QP is not already in "Reset" state, then transition to
1907 * "Reset". This is necessary because software does not reclaim
1908 * ownership of the QP context until the QP is in the "Reset" state.
1909 * If the ownership transfer fails for any reason, then it is an
1910 * indication that something (either in HW or SW) has gone seriously
1911 * wrong. So we print a warning message and return.
1912 */
1913 if (qp->qp_state != HERMON_QP_RESET) {
1914 if (hermon_qp_to_reset(state, qp) != DDI_SUCCESS) {
1915 mutex_exit(&qp->qp_lock);
1916 HERMON_WARNING(state, "failed to reset QP context");
1917 status = ibc_get_ci_failure(0);
1918 goto qpfree_fail;
1919 }
1920 qp->qp_state = HERMON_QP_RESET;
1921 HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET);
1922
1923 /*
1924 * Do any additional handling necessary for the transition
1925 * to the "Reset" state (e.g. update the WRID lists)
1926 */
1927 if (hermon_wrid_to_reset_handling(state, qp) != DDI_SUCCESS) {
1928 mutex_exit(&qp->qp_lock);
1929 HERMON_WARNING(state, "failed to reset QP WRID list");
1930 status = ibc_get_ci_failure(0);
1931 goto qpfree_fail;
1932 }
1933 }
1934
1935 /*
1936 * If this was a user-mappable QP, then we need to remove its entry
1937 * from the "userland resources database". If it is also currently
1938 * mmap()'d out to a user process, then we need to call
1939 * devmap_devmem_remap() to remap the QP memory to an invalid mapping.
1940 * We also need to invalidate the QP tracking information for the
1941 * user mapping.
1942 */
1943 if (qp->qp_alloc_flags & IBT_QP_USER_MAP) {
1944 status = hermon_umap_db_find(state->hs_instance, qp->qp_qpnum,
1945 MLNX_UMAP_QPMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE,
1946 &umapdb);
1947 if (status != DDI_SUCCESS) {
1948 mutex_exit(&qp->qp_lock);
1949 HERMON_WARNING(state, "failed to find in database");
1950 return (ibc_get_ci_failure(0));
1951 }
1952 hermon_umap_db_free(umapdb);
1953 if (qp->qp_umap_dhp != NULL) {
1954 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
1955 status = devmap_devmem_remap(qp->qp_umap_dhp,
1956 state->hs_dip, 0, 0, qp->qp_wqinfo.qa_size,
1957 maxprot, DEVMAP_MAPPING_INVALID, NULL);
1958 if (status != DDI_SUCCESS) {
1959 mutex_exit(&qp->qp_lock);
1960 HERMON_WARNING(state, "failed in QP memory "
1961 "devmap_devmem_remap()");
1962 return (ibc_get_ci_failure(0));
1963 }
1964 qp->qp_umap_dhp = (devmap_cookie_t)NULL;
1965 }
1966 }
1967
1968
1969 /*
1970 * Put NULL into the Hermon QPNum-to-QPHdl list. This will allow any
1971 * in-progress events to detect that the QP corresponding to this
1972 * number has been freed. Note: it does depend in whether we are
1973 * freeing a special QP or not.
1974 */
1975 if (qpc == NULL) {
1976 hermon_icm_set_num_to_hdl(state, HERMON_QPC,
1977 qp->qp_qpnum, NULL);
1978 } else if (qp->qp_is_special) {
1979 hermon_icm_set_num_to_hdl(state, HERMON_QPC,
1980 qpc->hr_indx + port, NULL);
1981 } else {
1982 hermon_icm_set_num_to_hdl(state, HERMON_QPC,
1983 qpc->hr_indx, NULL);
1984 }
1985
1986 /*
1987 * Drop the QP lock
1988 * At this point the lock is no longer necessary. We cannot
1989 * protect from multiple simultaneous calls to free the same QP.
1990 * In addition, since the QP lock is contained in the QP "software
1991 * handle" resource, which we will free (see below), it is
1992 * important that we have no further references to that memory.
1993 */
1994 mutex_exit(&qp->qp_lock);
1995 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp))
1996
1997 /*
1998 * Free the QP resources
1999 * Start by deregistering and freeing the memory for work queues.
2000 * Next free any previously allocated context information
2001 * (depending on QP type)
2002 * Finally, decrement the necessary reference counts.
2003 * If this fails for any reason, then it is an indication that
2004 * something (either in HW or SW) has gone seriously wrong. So we
2005 * print a warning message and return.
2006 */
2007 status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
2008 sleepflag);
2009 if (status != DDI_SUCCESS) {
2010 HERMON_WARNING(state, "failed to deregister QP memory");
2011 status = ibc_get_ci_failure(0);
2012 goto qpfree_fail;
2013 }
2014
2015 /* Free the memory for the QP */
2016 hermon_queue_free(&qp->qp_wqinfo);
2017
2018 if (qp->qp_sq_wqhdr)
2019 hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr);
2020 if (qp->qp_rq_wqhdr)
2021 hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr);
2022
2023 /* Free the dbr */
2024 if (!qp_srq_en) {
2025 hermon_dbr_free(state, qp->qp_uarpg, qp->qp_rq_vdbr);
2026 }
2027
2028 /*
2029 * Free up the remainder of the QP resources. Note: we have a few
2030 * different resources to free up depending on whether the QP is a
2031 * special QP or not. As described above, if any of these fail for
2032 * any reason it is an indication that something (either in HW or SW)
2033 * has gone seriously wrong. So we print a warning message and
2034 * return.
2035 */
2036 if (qp->qp_is_special) {
2037 type = (qp->qp_is_special == HERMON_QP_SMI) ?
2038 IBT_SMI_SQP : IBT_GSI_SQP;
2039
2040 /* Free up resources for the special QP */
2041 status = hermon_special_qp_rsrc_free(state, type, port);
2042 if (status != DDI_SUCCESS) {
2043 HERMON_WARNING(state, "failed to free special QP rsrc");
2044 status = ibc_get_ci_failure(0);
2045 goto qpfree_fail;
2046 }
2047
2048 } else if (qp->qp_rangep) {
2049 int refcnt;
2050 mutex_enter(&qp->qp_rangep->hqpr_lock);
2051 refcnt = --qp->qp_rangep->hqpr_refcnt;
2052 mutex_exit(&qp->qp_rangep->hqpr_lock);
2053 if (refcnt == 0) {
2054 mutex_destroy(&qp->qp_rangep->hqpr_lock);
2055 hermon_rsrc_free(state, &qp->qp_rangep->hqpr_qpcrsrc);
2056 kmem_free(qp->qp_rangep, sizeof (*qp->qp_rangep));
2057 }
2058 qp->qp_rangep = NULL;
2059 } else if (qp->qp_qpn_hdl == NULL) {
2060 hermon_rsrc_free(state, &qpc);
2061 } else {
2062 /*
2063 * Check the flags and determine whether to release the
2064 * QPN or not, based on their value.
2065 */
2066 if (free_qp_flags == IBC_FREE_QP_ONLY) {
2067 entry = qp->qp_qpn_hdl;
2068 hermon_qp_release_qpn(state, qp->qp_qpn_hdl,
2069 HERMON_QPN_FREE_ONLY);
2070 *qpnh = (ibc_qpn_hdl_t)entry;
2071 } else {
2072 hermon_qp_release_qpn(state, qp->qp_qpn_hdl,
2073 HERMON_QPN_RELEASE);
2074 }
2075 }
2076
2077 mutex_destroy(&qp->qp_sq_lock);
2078
2079 /* Free the Hermon Queue Pair handle */
2080 hermon_rsrc_free(state, &rsrc);
2081
2082 /* Decrement the reference counts on CQs, PD and SRQ (if needed) */
2083 hermon_cq_refcnt_dec(rq_cq);
2084 hermon_cq_refcnt_dec(sq_cq);
2085 hermon_pd_refcnt_dec(pd);
2086 if (qp_srq_en == HERMON_QP_SRQ_ENABLED) {
2087 hermon_srq_refcnt_dec(srq);
2088 }
2089
2090 /* Set the qphdl pointer to NULL and return success */
2091 *qphdl = NULL;
2092
2093 return (DDI_SUCCESS);
2094
2095 qpfree_fail:
2096 return (status);
2097 }
2098
2099
2100 /*
2101 * hermon_qp_query()
2102 * Context: Can be called from interrupt or base context.
2103 */
2104 int
hermon_qp_query(hermon_state_t * state,hermon_qphdl_t qp,ibt_qp_query_attr_t * attr_p)2105 hermon_qp_query(hermon_state_t *state, hermon_qphdl_t qp,
2106 ibt_qp_query_attr_t *attr_p)
2107 {
2108 ibt_cep_state_t qp_state;
2109 ibt_qp_ud_attr_t *ud;
2110 ibt_qp_rc_attr_t *rc;
2111 ibt_qp_uc_attr_t *uc;
2112 ibt_cep_flags_t enable_flags;
2113 hermon_hw_addr_path_t *qpc_path, *qpc_alt_path;
2114 ibt_cep_path_t *path_ptr, *alt_path_ptr;
2115 hermon_hw_qpc_t *qpc;
2116 int status;
2117 uint_t tmp_sched_q, tmp_alt_sched_q;
2118
2119 mutex_enter(&qp->qp_lock);
2120
2121 /*
2122 * Grab the temporary QPC entry from QP software state
2123 */
2124 qpc = &qp->qpc;
2125
2126 /* Convert the current Hermon QP state to IBTF QP state */
2127 switch (qp->qp_state) {
2128 case HERMON_QP_RESET:
2129 qp_state = IBT_STATE_RESET; /* "Reset" */
2130 break;
2131 case HERMON_QP_INIT:
2132 qp_state = IBT_STATE_INIT; /* Initialized */
2133 break;
2134 case HERMON_QP_RTR:
2135 qp_state = IBT_STATE_RTR; /* Ready to Receive */
2136 break;
2137 case HERMON_QP_RTS:
2138 qp_state = IBT_STATE_RTS; /* Ready to Send */
2139 break;
2140 case HERMON_QP_SQERR:
2141 qp_state = IBT_STATE_SQE; /* Send Queue Error */
2142 break;
2143 case HERMON_QP_SQD:
2144 if (qp->qp_sqd_still_draining) {
2145 qp_state = IBT_STATE_SQDRAIN; /* SQ Draining */
2146 } else {
2147 qp_state = IBT_STATE_SQD; /* SQ Drained */
2148 }
2149 break;
2150 case HERMON_QP_ERR:
2151 qp_state = IBT_STATE_ERROR; /* Error */
2152 break;
2153 default:
2154 mutex_exit(&qp->qp_lock);
2155 return (ibc_get_ci_failure(0));
2156 }
2157 attr_p->qp_info.qp_state = qp_state;
2158
2159 /* SRQ Hook. */
2160 attr_p->qp_srq = NULL;
2161
2162 /*
2163 * The following QP information is always returned, regardless of
2164 * the current QP state. Note: Some special handling is necessary
2165 * for calculating the QP number on special QP (QP0 and QP1).
2166 */
2167 attr_p->qp_sq_cq =
2168 (qp->qp_sq_cqhdl == NULL) ? NULL : qp->qp_sq_cqhdl->cq_hdlrarg;
2169 attr_p->qp_rq_cq =
2170 (qp->qp_rq_cqhdl == NULL) ? NULL : qp->qp_rq_cqhdl->cq_hdlrarg;
2171 if (qp->qp_is_special) {
2172 attr_p->qp_qpn = (qp->qp_is_special == HERMON_QP_SMI) ? 0 : 1;
2173 } else {
2174 attr_p->qp_qpn = (ib_qpn_t)qp->qp_qpnum;
2175 }
2176 attr_p->qp_sq_sgl = qp->qp_sq_sgl;
2177 attr_p->qp_rq_sgl = qp->qp_rq_sgl;
2178 attr_p->qp_info.qp_sq_sz = qp->qp_sq_bufsz - qp->qp_sq_hdrmwqes;
2179 attr_p->qp_info.qp_rq_sz = qp->qp_rq_bufsz;
2180
2181 /*
2182 * If QP is currently in the "Reset" state, then only the above are
2183 * returned
2184 */
2185 if (qp_state == IBT_STATE_RESET) {
2186 mutex_exit(&qp->qp_lock);
2187 return (DDI_SUCCESS);
2188 }
2189
2190 /*
2191 * Post QUERY_QP command to firmware
2192 *
2193 * We do a HERMON_NOSLEEP here because we are holding the "qp_lock".
2194 * Since we may be in the interrupt context (or subsequently raised
2195 * to interrupt level by priority inversion), we do not want to block
2196 * in this routine waiting for success.
2197 */
2198 tmp_sched_q = qpc->pri_addr_path.sched_q;
2199 tmp_alt_sched_q = qpc->alt_addr_path.sched_q;
2200 status = hermon_cmn_query_cmd_post(state, QUERY_QP, 0, qp->qp_qpnum,
2201 qpc, sizeof (hermon_hw_qpc_t), HERMON_CMD_NOSLEEP_SPIN);
2202 if (status != HERMON_CMD_SUCCESS) {
2203 mutex_exit(&qp->qp_lock);
2204 cmn_err(CE_WARN, "hermon%d: hermon_qp_query: QUERY_QP "
2205 "command failed: %08x\n", state->hs_instance, status);
2206 if (status == HERMON_CMD_INVALID_STATUS) {
2207 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2208 }
2209 return (ibc_get_ci_failure(0));
2210 }
2211 qpc->pri_addr_path.sched_q = tmp_sched_q;
2212 qpc->alt_addr_path.sched_q = tmp_alt_sched_q;
2213
2214 /*
2215 * Fill in the additional QP info based on the QP's transport type.
2216 */
2217 if (qp->qp_type == IBT_UD_RQP) {
2218
2219 /* Fill in the UD-specific info */
2220 ud = &attr_p->qp_info.qp_transport.ud;
2221 ud->ud_qkey = (ib_qkey_t)qpc->qkey;
2222 ud->ud_sq_psn = qpc->next_snd_psn;
2223 ud->ud_pkey_ix = qpc->pri_addr_path.pkey_indx;
2224 /* port+1 for port 1/2 */
2225 ud->ud_port =
2226 (uint8_t)(((qpc->pri_addr_path.sched_q >> 6) & 0x01) + 1);
2227
2228 attr_p->qp_info.qp_trans = IBT_UD_SRV;
2229
2230 if (qp->qp_serv_type == HERMON_QP_FEXCH) {
2231 ibt_pmr_desc_t *pmr;
2232 uint64_t heart_beat;
2233
2234 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pmr))
2235 pmr = &attr_p->qp_query_fexch.fq_uni_mem_desc;
2236 pmr->pmd_iova = 0;
2237 pmr->pmd_lkey = pmr->pmd_rkey =
2238 hermon_fcoib_qpn_to_mkey(state, qp->qp_qpnum);
2239 pmr->pmd_phys_buf_list_sz =
2240 state->hs_fcoib.hfc_mtts_per_mpt;
2241 pmr->pmd_sync_required = 0;
2242
2243 pmr = &attr_p->qp_query_fexch.fq_bi_mem_desc;
2244 pmr->pmd_iova = 0;
2245 pmr->pmd_lkey = 0;
2246 pmr->pmd_rkey = 0;
2247 pmr->pmd_phys_buf_list_sz = 0;
2248 pmr->pmd_sync_required = 0;
2249
2250 attr_p->qp_query_fexch.fq_flags =
2251 ((hermon_get_heart_beat_rq_cmd_post(state,
2252 qp->qp_qpnum, &heart_beat) == HERMON_CMD_SUCCESS) &&
2253 (heart_beat == 0)) ? IBT_FEXCH_HEART_BEAT_OK :
2254 IBT_FEXCH_NO_FLAGS;
2255
2256 ud->ud_fc = qp->qp_fc_attr;
2257 } else if (qp->qp_serv_type == HERMON_QP_FCMND ||
2258 qp->qp_serv_type == HERMON_QP_RFCI) {
2259 ud->ud_fc = qp->qp_fc_attr;
2260 }
2261
2262 } else if (qp->qp_serv_type == HERMON_QP_RC) {
2263
2264 /* Fill in the RC-specific info */
2265 rc = &attr_p->qp_info.qp_transport.rc;
2266 rc->rc_sq_psn = qpc->next_snd_psn;
2267 rc->rc_rq_psn = qpc->next_rcv_psn;
2268 rc->rc_dst_qpn = qpc->rem_qpn;
2269
2270 /* Grab the path migration state information */
2271 if (qpc->pm_state == HERMON_QP_PMSTATE_MIGRATED) {
2272 rc->rc_mig_state = IBT_STATE_MIGRATED;
2273 } else if (qpc->pm_state == HERMON_QP_PMSTATE_REARM) {
2274 rc->rc_mig_state = IBT_STATE_REARMED;
2275 } else {
2276 rc->rc_mig_state = IBT_STATE_ARMED;
2277 }
2278 rc->rc_rdma_ra_out = (1 << qpc->sra_max);
2279 rc->rc_rdma_ra_in = (1 << qpc->rra_max);
2280 rc->rc_min_rnr_nak = qpc->min_rnr_nak;
2281 rc->rc_path_mtu = qpc->mtu;
2282 rc->rc_retry_cnt = qpc->retry_cnt;
2283
2284 /* Get the common primary address path fields */
2285 qpc_path = &qpc->pri_addr_path;
2286 path_ptr = &rc->rc_path;
2287 hermon_get_addr_path(state, qpc_path, &path_ptr->cep_adds_vect,
2288 HERMON_ADDRPATH_QP);
2289
2290 /* Fill in the additional primary address path fields */
2291 path_ptr->cep_pkey_ix = qpc_path->pkey_indx;
2292 path_ptr->cep_hca_port_num =
2293 path_ptr->cep_adds_vect.av_port_num =
2294 (uint8_t)(((qpc_path->sched_q >> 6) & 0x01) + 1);
2295 path_ptr->cep_timeout = qpc_path->ack_timeout;
2296
2297 /* Get the common alternate address path fields */
2298 qpc_alt_path = &qpc->alt_addr_path;
2299 alt_path_ptr = &rc->rc_alt_path;
2300 hermon_get_addr_path(state, qpc_alt_path,
2301 &alt_path_ptr->cep_adds_vect, HERMON_ADDRPATH_QP);
2302
2303 /* Fill in the additional alternate address path fields */
2304 alt_path_ptr->cep_pkey_ix = qpc_alt_path->pkey_indx;
2305 alt_path_ptr->cep_hca_port_num =
2306 alt_path_ptr->cep_adds_vect.av_port_num =
2307 (uint8_t)(((qpc_alt_path->sched_q >> 6) & 0x01) + 1);
2308 alt_path_ptr->cep_timeout = qpc_alt_path->ack_timeout;
2309
2310 /* Get the RNR retry time from primary path */
2311 rc->rc_rnr_retry_cnt = qpc->rnr_retry;
2312
2313 /* Set the enable flags based on RDMA/Atomic enable bits */
2314 enable_flags = IBT_CEP_NO_FLAGS;
2315 enable_flags |= ((qpc->rre == 0) ? 0 : IBT_CEP_RDMA_RD);
2316 enable_flags |= ((qpc->rwe == 0) ? 0 : IBT_CEP_RDMA_WR);
2317 enable_flags |= ((qpc->rae == 0) ? 0 : IBT_CEP_ATOMIC);
2318 attr_p->qp_info.qp_flags = enable_flags;
2319
2320 attr_p->qp_info.qp_trans = IBT_RC_SRV;
2321
2322 } else if (qp->qp_serv_type == HERMON_QP_UC) {
2323
2324 /* Fill in the UC-specific info */
2325 uc = &attr_p->qp_info.qp_transport.uc;
2326 uc->uc_sq_psn = qpc->next_snd_psn;
2327 uc->uc_rq_psn = qpc->next_rcv_psn;
2328 uc->uc_dst_qpn = qpc->rem_qpn;
2329
2330 /* Grab the path migration state information */
2331 if (qpc->pm_state == HERMON_QP_PMSTATE_MIGRATED) {
2332 uc->uc_mig_state = IBT_STATE_MIGRATED;
2333 } else if (qpc->pm_state == HERMON_QP_PMSTATE_REARM) {
2334 uc->uc_mig_state = IBT_STATE_REARMED;
2335 } else {
2336 uc->uc_mig_state = IBT_STATE_ARMED;
2337 }
2338 uc->uc_path_mtu = qpc->mtu;
2339
2340 /* Get the common primary address path fields */
2341 qpc_path = &qpc->pri_addr_path;
2342 path_ptr = &uc->uc_path;
2343 hermon_get_addr_path(state, qpc_path, &path_ptr->cep_adds_vect,
2344 HERMON_ADDRPATH_QP);
2345
2346 /* Fill in the additional primary address path fields */
2347 path_ptr->cep_pkey_ix = qpc_path->pkey_indx;
2348 path_ptr->cep_hca_port_num =
2349 path_ptr->cep_adds_vect.av_port_num =
2350 (uint8_t)(((qpc_path->sched_q >> 6) & 0x01) + 1);
2351
2352 /* Get the common alternate address path fields */
2353 qpc_alt_path = &qpc->alt_addr_path;
2354 alt_path_ptr = &uc->uc_alt_path;
2355 hermon_get_addr_path(state, qpc_alt_path,
2356 &alt_path_ptr->cep_adds_vect, HERMON_ADDRPATH_QP);
2357
2358 /* Fill in the additional alternate address path fields */
2359 alt_path_ptr->cep_pkey_ix = qpc_alt_path->pkey_indx;
2360 alt_path_ptr->cep_hca_port_num =
2361 alt_path_ptr->cep_adds_vect.av_port_num =
2362 (uint8_t)(((qpc_alt_path->sched_q >> 6) & 0x01) + 1);
2363
2364 /*
2365 * Set the enable flags based on RDMA enable bits (by
2366 * definition UC doesn't support Atomic or RDMA Read)
2367 */
2368 enable_flags = ((qpc->rwe == 0) ? 0 : IBT_CEP_RDMA_WR);
2369 attr_p->qp_info.qp_flags = enable_flags;
2370
2371 attr_p->qp_info.qp_trans = IBT_UC_SRV;
2372
2373 } else {
2374 HERMON_WARNING(state, "unexpected QP transport type");
2375 mutex_exit(&qp->qp_lock);
2376 return (ibc_get_ci_failure(0));
2377 }
2378
2379 /*
2380 * Under certain circumstances it is possible for the Hermon hardware
2381 * to transition to one of the error states without software directly
2382 * knowing about it. The QueryQP() call is the one place where we
2383 * have an opportunity to sample and update our view of the QP state.
2384 */
2385 if (qpc->state == HERMON_QP_SQERR) {
2386 attr_p->qp_info.qp_state = IBT_STATE_SQE;
2387 qp->qp_state = HERMON_QP_SQERR;
2388 HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_SQERR);
2389 }
2390 if (qpc->state == HERMON_QP_ERR) {
2391 attr_p->qp_info.qp_state = IBT_STATE_ERROR;
2392 qp->qp_state = HERMON_QP_ERR;
2393 HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_ERR);
2394 }
2395 mutex_exit(&qp->qp_lock);
2396
2397 return (DDI_SUCCESS);
2398 }
2399
2400
2401 /*
2402 * hermon_qp_create_qpn()
2403 * Context: Can be called from interrupt or base context.
2404 */
2405 static int
hermon_qp_create_qpn(hermon_state_t * state,hermon_qphdl_t qp,hermon_rsrc_t * qpc)2406 hermon_qp_create_qpn(hermon_state_t *state, hermon_qphdl_t qp,
2407 hermon_rsrc_t *qpc)
2408 {
2409 hermon_qpn_entry_t query;
2410 hermon_qpn_entry_t *entry;
2411 avl_index_t where;
2412
2413 /*
2414 * Build a query (for the AVL tree lookup) and attempt to find
2415 * a previously added entry that has a matching QPC index. If
2416 * no matching entry is found, then allocate, initialize, and
2417 * add an entry to the AVL tree.
2418 * If a matching entry is found, then increment its QPN counter
2419 * and reference counter.
2420 */
2421 query.qpn_indx = qpc->hr_indx;
2422 mutex_enter(&state->hs_qpn_avl_lock);
2423 entry = (hermon_qpn_entry_t *)avl_find(&state->hs_qpn_avl,
2424 &query, &where);
2425 if (entry == NULL) {
2426 /*
2427 * Allocate and initialize a QPN entry, then insert
2428 * it into the AVL tree.
2429 */
2430 entry = (hermon_qpn_entry_t *)kmem_zalloc(
2431 sizeof (hermon_qpn_entry_t), KM_NOSLEEP);
2432 if (entry == NULL) {
2433 mutex_exit(&state->hs_qpn_avl_lock);
2434 return (DDI_FAILURE);
2435 }
2436 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*entry))
2437
2438 entry->qpn_indx = qpc->hr_indx;
2439 entry->qpn_refcnt = 0;
2440 entry->qpn_counter = 0;
2441
2442 avl_insert(&state->hs_qpn_avl, entry, where);
2443 }
2444
2445 /*
2446 * Make the AVL tree entry point to the QP context resource that
2447 * it will be responsible for tracking
2448 */
2449 entry->qpn_qpc = qpc;
2450
2451 /*
2452 * Setup the QP handle to point to the AVL tree entry. Then
2453 * generate the new QP number from the entry's QPN counter value
2454 * and the hardware's QP context table index.
2455 */
2456 qp->qp_qpn_hdl = entry;
2457 qp->qp_qpnum = ((entry->qpn_counter <<
2458 state->hs_cfg_profile->cp_log_num_qp) | qpc->hr_indx) &
2459 HERMON_QP_MAXNUMBER_MSK;
2460 qp->qp_ring = qp->qp_qpnum << 8;
2461
2462 /*
2463 * Increment the reference counter and QPN counter. The QPN
2464 * counter always indicates the next available number for use.
2465 */
2466 entry->qpn_counter++;
2467 entry->qpn_refcnt++;
2468
2469 mutex_exit(&state->hs_qpn_avl_lock);
2470
2471 return (DDI_SUCCESS);
2472 }
2473
2474
2475 /*
2476 * hermon_qp_release_qpn()
2477 * Context: Can be called only from user or kernel context.
2478 */
2479 void
hermon_qp_release_qpn(hermon_state_t * state,hermon_qpn_entry_t * entry,int flags)2480 hermon_qp_release_qpn(hermon_state_t *state, hermon_qpn_entry_t *entry,
2481 int flags)
2482 {
2483 ASSERT(entry != NULL);
2484
2485 mutex_enter(&state->hs_qpn_avl_lock);
2486
2487 /*
2488 * If we are releasing the QP number here, then we decrement the
2489 * reference count and check for zero references. If there are
2490 * zero references, then we free the QPC context (if it hadn't
2491 * already been freed during a HERMON_QPN_FREE_ONLY free, i.e. for
2492 * reuse with another similar QP number) and remove the tracking
2493 * structure from the QP number AVL tree and free the structure.
2494 * If we are not releasing the QP number here, then, as long as we
2495 * have not exhausted the usefulness of the QPC context (that is,
2496 * re-used it too many times without the reference count having
2497 * gone to zero), we free up the QPC context for use by another
2498 * thread (which will use it to construct a different QP number
2499 * from the same QPC table index).
2500 */
2501 if (flags == HERMON_QPN_RELEASE) {
2502 entry->qpn_refcnt--;
2503
2504 /*
2505 * If the reference count is zero, then we free the QPC
2506 * context (if it hadn't already been freed in an early
2507 * step, e.g. HERMON_QPN_FREE_ONLY) and remove/free the
2508 * tracking structure from the QP number AVL tree.
2509 */
2510 if (entry->qpn_refcnt == 0) {
2511 if (entry->qpn_qpc != NULL) {
2512 hermon_rsrc_free(state, &entry->qpn_qpc);
2513 }
2514
2515 /*
2516 * If the current entry has served it's useful
2517 * purpose (i.e. been reused the maximum allowable
2518 * number of times), then remove it from QP number
2519 * AVL tree and free it up.
2520 */
2521 if (entry->qpn_counter >= (1 <<
2522 (24 - state->hs_cfg_profile->cp_log_num_qp))) {
2523 avl_remove(&state->hs_qpn_avl, entry);
2524 kmem_free(entry, sizeof (hermon_qpn_entry_t));
2525 }
2526 }
2527
2528 } else if (flags == HERMON_QPN_FREE_ONLY) {
2529 /*
2530 * Even if we are not freeing the QP number, that will not
2531 * always prevent us from releasing the QPC context. In fact,
2532 * since the QPC context only forms part of the whole QPN,
2533 * we want to free it up for use by other consumers. But
2534 * if the reference count is non-zero (which it will always
2535 * be when we are doing HERMON_QPN_FREE_ONLY) and the counter
2536 * has reached its maximum value, then we cannot reuse the
2537 * QPC context until the reference count eventually reaches
2538 * zero (in HERMON_QPN_RELEASE, above).
2539 */
2540 if (entry->qpn_counter < (1 <<
2541 (24 - state->hs_cfg_profile->cp_log_num_qp))) {
2542 hermon_rsrc_free(state, &entry->qpn_qpc);
2543 }
2544 }
2545 mutex_exit(&state->hs_qpn_avl_lock);
2546 }
2547
2548
2549 /*
2550 * hermon_qpn_avl_compare()
2551 * Context: Can be called from user or kernel context.
2552 */
2553 static int
hermon_qpn_avl_compare(const void * q,const void * e)2554 hermon_qpn_avl_compare(const void *q, const void *e)
2555 {
2556 hermon_qpn_entry_t *entry, *query;
2557
2558 entry = (hermon_qpn_entry_t *)e;
2559 query = (hermon_qpn_entry_t *)q;
2560
2561 if (query->qpn_indx < entry->qpn_indx) {
2562 return (-1);
2563 } else if (query->qpn_indx > entry->qpn_indx) {
2564 return (+1);
2565 } else {
2566 return (0);
2567 }
2568 }
2569
2570
2571 /*
2572 * hermon_qpn_avl_init()
2573 * Context: Only called from attach() path context
2574 */
2575 void
hermon_qpn_avl_init(hermon_state_t * state)2576 hermon_qpn_avl_init(hermon_state_t *state)
2577 {
2578 /* Initialize the lock used for QP number (QPN) AVL tree access */
2579 mutex_init(&state->hs_qpn_avl_lock, NULL, MUTEX_DRIVER,
2580 DDI_INTR_PRI(state->hs_intrmsi_pri));
2581
2582 /* Initialize the AVL tree for the QP number (QPN) storage */
2583 avl_create(&state->hs_qpn_avl, hermon_qpn_avl_compare,
2584 sizeof (hermon_qpn_entry_t),
2585 offsetof(hermon_qpn_entry_t, qpn_avlnode));
2586 }
2587
2588
2589 /*
2590 * hermon_qpn_avl_fini()
2591 * Context: Only called from attach() and/or detach() path contexts
2592 */
2593 void
hermon_qpn_avl_fini(hermon_state_t * state)2594 hermon_qpn_avl_fini(hermon_state_t *state)
2595 {
2596 hermon_qpn_entry_t *entry;
2597 void *cookie;
2598
2599 /*
2600 * Empty all entries (if necessary) and destroy the AVL tree
2601 * that was used for QP number (QPN) tracking.
2602 */
2603 cookie = NULL;
2604 while ((entry = (hermon_qpn_entry_t *)avl_destroy_nodes(
2605 &state->hs_qpn_avl, &cookie)) != NULL) {
2606 kmem_free(entry, sizeof (hermon_qpn_entry_t));
2607 }
2608 avl_destroy(&state->hs_qpn_avl);
2609
2610 /* Destroy the lock used for QP number (QPN) AVL tree access */
2611 mutex_destroy(&state->hs_qpn_avl_lock);
2612 }
2613
2614
2615 /*
2616 * hermon_qphdl_from_qpnum()
2617 * Context: Can be called from interrupt or base context.
2618 *
2619 * This routine is important because changing the unconstrained
2620 * portion of the QP number is critical to the detection of a
2621 * potential race condition in the QP event handler code (i.e. the case
2622 * where a QP is freed and alloc'd again before an event for the
2623 * "old" QP can be handled).
2624 *
2625 * While this is not a perfect solution (not sure that one exists)
2626 * it does help to mitigate the chance that this race condition will
2627 * cause us to deliver a "stale" event to the new QP owner. Note:
2628 * this solution does not scale well because the number of constrained
2629 * bits increases (and, hence, the number of unconstrained bits
2630 * decreases) as the number of supported QPs grows. For small and
2631 * intermediate values, it should hopefully provide sufficient
2632 * protection.
2633 */
2634 hermon_qphdl_t
hermon_qphdl_from_qpnum(hermon_state_t * state,uint_t qpnum)2635 hermon_qphdl_from_qpnum(hermon_state_t *state, uint_t qpnum)
2636 {
2637 uint_t qpindx, qpmask;
2638
2639 /* Calculate the QP table index from the qpnum */
2640 qpmask = (1 << state->hs_cfg_profile->cp_log_num_qp) - 1;
2641 qpindx = qpnum & qpmask;
2642 return (hermon_icm_num_to_hdl(state, HERMON_QPC, qpindx));
2643 }
2644
2645
2646 /*
2647 * hermon_special_qp_rsrc_alloc
2648 * Context: Can be called from interrupt or base context.
2649 */
2650 static int
hermon_special_qp_rsrc_alloc(hermon_state_t * state,ibt_sqp_type_t type,uint_t port,hermon_rsrc_t ** qp_rsrc)2651 hermon_special_qp_rsrc_alloc(hermon_state_t *state, ibt_sqp_type_t type,
2652 uint_t port, hermon_rsrc_t **qp_rsrc)
2653 {
2654 uint_t mask, flags;
2655 int status;
2656
2657 mutex_enter(&state->hs_spec_qplock);
2658 flags = state->hs_spec_qpflags;
2659 if (type == IBT_SMI_SQP) {
2660 /*
2661 * Check here to see if the driver has been configured
2662 * to instruct the Hermon firmware to handle all incoming
2663 * SMP messages (i.e. messages sent to SMA). If so,
2664 * then we will treat QP0 as if it has already been
2665 * allocated (for internal use). Otherwise, if we allow
2666 * the allocation to happen, it will cause unexpected
2667 * behaviors (e.g. Hermon SMA becomes unresponsive).
2668 */
2669 if (state->hs_cfg_profile->cp_qp0_agents_in_fw != 0) {
2670 mutex_exit(&state->hs_spec_qplock);
2671 return (IBT_QP_IN_USE);
2672 }
2673
2674 /*
2675 * If this is the first QP0 allocation, then post
2676 * a CONF_SPECIAL_QP firmware command
2677 */
2678 if ((flags & HERMON_SPECIAL_QP0_RSRC_MASK) == 0) {
2679 status = hermon_conf_special_qp_cmd_post(state,
2680 state->hs_spec_qp0->hr_indx, HERMON_CMD_QP_SMI,
2681 HERMON_CMD_NOSLEEP_SPIN,
2682 HERMON_CMD_SPEC_QP_OPMOD(
2683 state->hs_cfg_profile->cp_qp0_agents_in_fw,
2684 state->hs_cfg_profile->cp_qp1_agents_in_fw));
2685 if (status != HERMON_CMD_SUCCESS) {
2686 mutex_exit(&state->hs_spec_qplock);
2687 cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP "
2688 "command failed: %08x\n",
2689 state->hs_instance, status);
2690 return (IBT_INSUFF_RESOURCE);
2691 }
2692 }
2693
2694 /*
2695 * Now check (and, if necessary, modify) the flags to indicate
2696 * whether the allocation was successful
2697 */
2698 mask = (1 << (HERMON_SPECIAL_QP0_RSRC + port));
2699 if (flags & mask) {
2700 mutex_exit(&state->hs_spec_qplock);
2701 return (IBT_QP_IN_USE);
2702 }
2703 state->hs_spec_qpflags |= mask;
2704 *qp_rsrc = state->hs_spec_qp0;
2705
2706 } else {
2707 /*
2708 * If this is the first QP1 allocation, then post
2709 * a CONF_SPECIAL_QP firmware command
2710 */
2711 if ((flags & HERMON_SPECIAL_QP1_RSRC_MASK) == 0) {
2712 status = hermon_conf_special_qp_cmd_post(state,
2713 state->hs_spec_qp1->hr_indx, HERMON_CMD_QP_GSI,
2714 HERMON_CMD_NOSLEEP_SPIN,
2715 HERMON_CMD_SPEC_QP_OPMOD(
2716 state->hs_cfg_profile->cp_qp0_agents_in_fw,
2717 state->hs_cfg_profile->cp_qp1_agents_in_fw));
2718 if (status != HERMON_CMD_SUCCESS) {
2719 mutex_exit(&state->hs_spec_qplock);
2720 cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP "
2721 "command failed: %08x\n",
2722 state->hs_instance, status);
2723 return (IBT_INSUFF_RESOURCE);
2724 }
2725 }
2726
2727 /*
2728 * Now check (and, if necessary, modify) the flags to indicate
2729 * whether the allocation was successful
2730 */
2731 mask = (1 << (HERMON_SPECIAL_QP1_RSRC + port));
2732 if (flags & mask) {
2733 mutex_exit(&state->hs_spec_qplock);
2734 return (IBT_QP_IN_USE);
2735 }
2736 state->hs_spec_qpflags |= mask;
2737 *qp_rsrc = state->hs_spec_qp1;
2738 }
2739
2740 mutex_exit(&state->hs_spec_qplock);
2741 return (DDI_SUCCESS);
2742 }
2743
2744
2745 /*
2746 * hermon_special_qp_rsrc_free
2747 * Context: Can be called from interrupt or base context.
2748 */
2749 static int
hermon_special_qp_rsrc_free(hermon_state_t * state,ibt_sqp_type_t type,uint_t port)2750 hermon_special_qp_rsrc_free(hermon_state_t *state, ibt_sqp_type_t type,
2751 uint_t port)
2752 {
2753 uint_t mask, flags;
2754 int status;
2755
2756 mutex_enter(&state->hs_spec_qplock);
2757 if (type == IBT_SMI_SQP) {
2758 mask = (1 << (HERMON_SPECIAL_QP0_RSRC + port));
2759 state->hs_spec_qpflags &= ~mask;
2760 flags = state->hs_spec_qpflags;
2761
2762 /*
2763 * If this is the last QP0 free, then post a CONF_SPECIAL_QP
2764 * NOW, If this is the last Special QP free, then post a
2765 * CONF_SPECIAL_QP firmware command - it'll stop them all
2766 */
2767 if (flags) {
2768 status = hermon_conf_special_qp_cmd_post(state, 0,
2769 HERMON_CMD_QP_SMI, HERMON_CMD_NOSLEEP_SPIN, 0);
2770 if (status != HERMON_CMD_SUCCESS) {
2771 mutex_exit(&state->hs_spec_qplock);
2772 cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP "
2773 "command failed: %08x\n",
2774 state->hs_instance, status);
2775 if (status == HERMON_CMD_INVALID_STATUS) {
2776 hermon_fm_ereport(state, HCA_SYS_ERR,
2777 HCA_ERR_SRV_LOST);
2778 }
2779 return (ibc_get_ci_failure(0));
2780 }
2781 }
2782 } else {
2783 mask = (1 << (HERMON_SPECIAL_QP1_RSRC + port));
2784 state->hs_spec_qpflags &= ~mask;
2785 flags = state->hs_spec_qpflags;
2786
2787 /*
2788 * If this is the last QP1 free, then post a CONF_SPECIAL_QP
2789 * NOW, if this is the last special QP free, then post a
2790 * CONF_SPECIAL_QP firmware command - it'll stop them all
2791 */
2792 if (flags) {
2793 status = hermon_conf_special_qp_cmd_post(state, 0,
2794 HERMON_CMD_QP_GSI, HERMON_CMD_NOSLEEP_SPIN, 0);
2795 if (status != HERMON_CMD_SUCCESS) {
2796 mutex_exit(&state->hs_spec_qplock);
2797 cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP "
2798 "command failed: %08x\n",
2799 state->hs_instance, status);
2800 if (status == HERMON_CMD_INVALID_STATUS) {
2801 hermon_fm_ereport(state, HCA_SYS_ERR,
2802 HCA_ERR_SRV_LOST);
2803 }
2804 return (ibc_get_ci_failure(0));
2805 }
2806 }
2807 }
2808
2809 mutex_exit(&state->hs_spec_qplock);
2810 return (DDI_SUCCESS);
2811 }
2812
2813
2814 /*
2815 * hermon_qp_sgl_to_logwqesz()
2816 * Context: Can be called from interrupt or base context.
2817 */
2818 static void
hermon_qp_sgl_to_logwqesz(hermon_state_t * state,uint_t num_sgl,uint_t real_max_sgl,hermon_qp_wq_type_t wq_type,uint_t * logwqesz,uint_t * max_sgl)2819 hermon_qp_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl,
2820 uint_t real_max_sgl, hermon_qp_wq_type_t wq_type,
2821 uint_t *logwqesz, uint_t *max_sgl)
2822 {
2823 uint_t max_size, log2, actual_sgl;
2824
2825 switch (wq_type) {
2826 case HERMON_QP_WQ_TYPE_SENDQ_UD:
2827 /*
2828 * Use requested maximum SGL to calculate max descriptor size
2829 * (while guaranteeing that the descriptor size is a
2830 * power-of-2 cachelines).
2831 */
2832 max_size = (HERMON_QP_WQE_MLX_SND_HDRS + (num_sgl << 4));
2833 log2 = highbit(max_size);
2834 if ((max_size & (max_size - 1)) == 0) {
2835 log2 = log2 - 1;
2836 }
2837
2838 /* Make sure descriptor is at least the minimum size */
2839 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2840
2841 /* Calculate actual number of SGL (given WQE size) */
2842 actual_sgl = ((1 << log2) -
2843 sizeof (hermon_hw_snd_wqe_ctrl_t)) >> 4;
2844 break;
2845
2846 case HERMON_QP_WQ_TYPE_SENDQ_CONN:
2847 /*
2848 * Use requested maximum SGL to calculate max descriptor size
2849 * (while guaranteeing that the descriptor size is a
2850 * power-of-2 cachelines).
2851 */
2852 max_size = (HERMON_QP_WQE_MLX_SND_HDRS + (num_sgl << 4));
2853 log2 = highbit(max_size);
2854 if ((max_size & (max_size - 1)) == 0) {
2855 log2 = log2 - 1;
2856 }
2857
2858 /* Make sure descriptor is at least the minimum size */
2859 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2860
2861 /* Calculate actual number of SGL (given WQE size) */
2862 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_SND_HDRS) >> 4;
2863 break;
2864
2865 case HERMON_QP_WQ_TYPE_RECVQ:
2866 /*
2867 * Same as above (except for Recv WQEs)
2868 */
2869 max_size = (HERMON_QP_WQE_MLX_RCV_HDRS + (num_sgl << 4));
2870 log2 = highbit(max_size);
2871 if ((max_size & (max_size - 1)) == 0) {
2872 log2 = log2 - 1;
2873 }
2874
2875 /* Make sure descriptor is at least the minimum size */
2876 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2877
2878 /* Calculate actual number of SGL (given WQE size) */
2879 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_RCV_HDRS) >> 4;
2880 break;
2881
2882 case HERMON_QP_WQ_TYPE_SENDMLX_QP0:
2883 /*
2884 * Same as above (except for MLX transport WQEs). For these
2885 * WQEs we have to account for the space consumed by the
2886 * "inline" packet headers. (This is smaller than for QP1
2887 * below because QP0 is not allowed to send packets with a GRH.
2888 */
2889 max_size = (HERMON_QP_WQE_MLX_QP0_HDRS + (num_sgl << 4));
2890 log2 = highbit(max_size);
2891 if ((max_size & (max_size - 1)) == 0) {
2892 log2 = log2 - 1;
2893 }
2894
2895 /* Make sure descriptor is at least the minimum size */
2896 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2897
2898 /* Calculate actual number of SGL (given WQE size) */
2899 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_QP0_HDRS) >> 4;
2900 break;
2901
2902 case HERMON_QP_WQ_TYPE_SENDMLX_QP1:
2903 /*
2904 * Same as above. For these WQEs we again have to account for
2905 * the space consumed by the "inline" packet headers. (This
2906 * is larger than for QP0 above because we have to account for
2907 * the possibility of a GRH in each packet - and this
2908 * introduces an alignment issue that causes us to consume
2909 * an additional 8 bytes).
2910 */
2911 max_size = (HERMON_QP_WQE_MLX_QP1_HDRS + (num_sgl << 4));
2912 log2 = highbit(max_size);
2913 if ((max_size & (max_size - 1)) == 0) {
2914 log2 = log2 - 1;
2915 }
2916
2917 /* Make sure descriptor is at least the minimum size */
2918 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2919
2920 /* Calculate actual number of SGL (given WQE size) */
2921 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_QP1_HDRS) >> 4;
2922 break;
2923
2924 default:
2925 HERMON_WARNING(state, "unexpected work queue type");
2926 break;
2927 }
2928
2929 /* Fill in the return values */
2930 *logwqesz = log2;
2931 *max_sgl = min(real_max_sgl, actual_sgl);
2932 }
2933