Lines Matching +full:ctrl +full:- +full:len

16  *      - Redistributions of source code must retain the above
20 * - Redistributions in binary form must reproduce the above
61 return qp->buf.buf + qp->rq.offset + (n << qp->rq.wqe_shift); in get_recv_wqe()
66 return qp->buf.buf + qp->sq.offset + (n << qp->sq.wqe_shift); in get_send_wqe()
78 int ds = (((struct mlx4_wqe_ctrl_seg *)wqe)->fence_size & 0x3f) << 2; in stamp_send_wqe()
86 qp->sq.head = 0; in mlx4_init_qp_indices()
87 qp->sq.tail = 0; in mlx4_init_qp_indices()
88 qp->rq.head = 0; in mlx4_init_qp_indices()
89 qp->rq.tail = 0; in mlx4_init_qp_indices()
94 struct mlx4_wqe_ctrl_seg *ctrl; in mlx4_qp_init_sq_ownership() local
97 for (i = 0; i < qp->sq.wqe_cnt; ++i) { in mlx4_qp_init_sq_ownership()
98 ctrl = get_send_wqe(qp, i); in mlx4_qp_init_sq_ownership()
99 ctrl->owner_opcode = htobe32(1 << 31); in mlx4_qp_init_sq_ownership()
100 ctrl->fence_size = 1 << (qp->sq.wqe_shift - 4); in mlx4_qp_init_sq_ownership()
110 cur = wq->head - wq->tail; in wq_overflow()
111 if (cur + nreq < wq->max_post) in wq_overflow()
114 pthread_spin_lock(&cq->lock); in wq_overflow()
115 cur = wq->head - wq->tail; in wq_overflow()
116 pthread_spin_unlock(&cq->lock); in wq_overflow()
118 return cur + nreq >= wq->max_post; in wq_overflow()
123 int acc = wr->bind_mw.bind_info.mw_access_flags; in set_bind_seg()
124 bseg->flags1 = 0; in set_bind_seg()
126 bseg->flags1 |= htobe32(MLX4_WQE_MW_ATOMIC); in set_bind_seg()
128 bseg->flags1 |= htobe32(MLX4_WQE_MW_REMOTE_WRITE); in set_bind_seg()
130 bseg->flags1 |= htobe32(MLX4_WQE_MW_REMOTE_READ); in set_bind_seg()
132 bseg->flags2 = 0; in set_bind_seg()
133 if (((struct ibv_mw *)(wr->bind_mw.mw))->type == IBV_MW_TYPE_2) in set_bind_seg()
134 bseg->flags2 |= htobe32(MLX4_WQE_BIND_TYPE_2); in set_bind_seg()
136 bseg->flags2 |= htobe32(MLX4_WQE_BIND_ZERO_BASED); in set_bind_seg()
138 bseg->new_rkey = htobe32(wr->bind_mw.rkey); in set_bind_seg()
139 bseg->lkey = htobe32(wr->bind_mw.bind_info.mr->lkey); in set_bind_seg()
140 bseg->addr = htobe64((uint64_t) wr->bind_mw.bind_info.addr); in set_bind_seg()
141 bseg->length = htobe64(wr->bind_mw.bind_info.length); in set_bind_seg()
147 iseg->mem_key = htobe32(rkey); in set_local_inv_seg()
149 iseg->reserved1 = 0; in set_local_inv_seg()
150 iseg->reserved2 = 0; in set_local_inv_seg()
151 iseg->reserved3[0] = 0; in set_local_inv_seg()
152 iseg->reserved3[1] = 0; in set_local_inv_seg()
158 rseg->raddr = htobe64(remote_addr); in set_raddr_seg()
159 rseg->rkey = htobe32(rkey); in set_raddr_seg()
160 rseg->reserved = 0; in set_raddr_seg()
165 if (wr->opcode == IBV_WR_ATOMIC_CMP_AND_SWP) { in set_atomic_seg()
166 aseg->swap_add = htobe64(wr->wr.atomic.swap); in set_atomic_seg()
167 aseg->compare = htobe64(wr->wr.atomic.compare_add); in set_atomic_seg()
169 aseg->swap_add = htobe64(wr->wr.atomic.compare_add); in set_atomic_seg()
170 aseg->compare = 0; in set_atomic_seg()
178 memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); in set_datagram_seg()
179 dseg->dqpn = htobe32(wr->wr.ud.remote_qpn); in set_datagram_seg()
180 dseg->qkey = htobe32(wr->wr.ud.remote_qkey); in set_datagram_seg()
181 dseg->vlan = htobe16(to_mah(wr->wr.ud.ah)->vlan); in set_datagram_seg()
182 memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->mac, 6); in set_datagram_seg()
187 dseg->byte_count = htobe32(sg->length); in __set_data_seg()
188 dseg->lkey = htobe32(sg->lkey); in __set_data_seg()
189 dseg->addr = htobe64(sg->addr); in __set_data_seg()
194 dseg->lkey = htobe32(sg->lkey); in set_data_seg()
195 dseg->addr = htobe64(sg->addr); in set_data_seg()
201 * a new cacheline, the HCA prefetcher could grab the 64-byte in set_data_seg()
207 if (likely(sg->length)) in set_data_seg()
208 dseg->byte_count = htobe32(sg->length); in set_data_seg()
210 dseg->byte_count = htobe32(0x80000000); in set_data_seg()
219 struct mlx4_wqe_ctrl_seg *ctrl = NULL; in mlx4_post_send() local
227 pthread_spin_lock(&qp->sq.lock); in mlx4_post_send()
231 ind = qp->sq.head; in mlx4_post_send()
233 for (nreq = 0; wr; ++nreq, wr = wr->next) { in mlx4_post_send()
234 if (wq_overflow(&qp->sq, nreq, to_mcq(ibqp->send_cq))) { in mlx4_post_send()
240 if (wr->num_sge > qp->sq.max_gs) { in mlx4_post_send()
246 if (wr->opcode >= sizeof mlx4_ib_opcode / sizeof mlx4_ib_opcode[0]) { in mlx4_post_send()
252 ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); in mlx4_post_send()
253 qp->sq.wrid[ind & (qp->sq.wqe_cnt - 1)] = wr->wr_id; in mlx4_post_send()
255 ctrl->srcrb_flags = in mlx4_post_send()
256 (wr->send_flags & IBV_SEND_SIGNALED ? in mlx4_post_send()
258 (wr->send_flags & IBV_SEND_SOLICITED ? in mlx4_post_send()
260 qp->sq_signal_bits; in mlx4_post_send()
262 if (wr->opcode == IBV_WR_SEND_WITH_IMM || in mlx4_post_send()
263 wr->opcode == IBV_WR_RDMA_WRITE_WITH_IMM) in mlx4_post_send()
264 ctrl->imm = wr->imm_data; in mlx4_post_send()
266 ctrl->imm = 0; in mlx4_post_send()
268 wqe += sizeof *ctrl; in mlx4_post_send()
269 size = sizeof *ctrl / 16; in mlx4_post_send()
271 switch (ibqp->qp_type) { in mlx4_post_send()
273 ctrl->srcrb_flags |= MLX4_REMOTE_SRQN_FLAGS(wr); in mlx4_post_send()
277 switch (wr->opcode) { in mlx4_post_send()
280 set_raddr_seg(wqe, wr->wr.atomic.remote_addr, in mlx4_post_send()
281 wr->wr.atomic.rkey); in mlx4_post_send()
296 if (!wr->num_sge) in mlx4_post_send()
298 set_raddr_seg(wqe, wr->wr.rdma.remote_addr, in mlx4_post_send()
299 wr->wr.rdma.rkey); in mlx4_post_send()
305 ctrl->srcrb_flags |= in mlx4_post_send()
307 set_local_inv_seg(wqe, wr->imm_data); in mlx4_post_send()
314 ctrl->srcrb_flags |= in mlx4_post_send()
323 ctrl->imm = htobe32(wr->imm_data); in mlx4_post_send()
337 if (wr->send_flags & IBV_SEND_IP_CSUM) { in mlx4_post_send()
338 if (!(qp->qp_cap_cache & MLX4_CSUM_SUPPORT_UD_OVER_IB)) { in mlx4_post_send()
343 ctrl->srcrb_flags |= htobe32(MLX4_WQE_CTRL_IP_HDR_CSUM | in mlx4_post_send()
351 ctrl->srcrb_flags |= htobe32(MLX4_WQE_CTRL_SOLICIT); in mlx4_post_send()
352 if (wr->send_flags & IBV_SEND_IP_CSUM) { in mlx4_post_send()
353 if (!(qp->qp_cap_cache & MLX4_CSUM_SUPPORT_RAW_OVER_ETH)) { in mlx4_post_send()
358 ctrl->srcrb_flags |= htobe32(MLX4_WQE_CTRL_IP_HDR_CSUM | in mlx4_post_send()
367 if (wr->send_flags & IBV_SEND_INLINE && wr->num_sge) { in mlx4_post_send()
370 int len, seg_len; in mlx4_post_send() local
378 off = ((uintptr_t) wqe) & (MLX4_INLINE_ALIGN - 1); in mlx4_post_send()
382 for (i = 0; i < wr->num_sge; ++i) { in mlx4_post_send()
383 addr = (void *) (uintptr_t) wr->sg_list[i].addr; in mlx4_post_send()
384 len = wr->sg_list[i].length; in mlx4_post_send()
385 inl += len; in mlx4_post_send()
387 if (inl > qp->max_inline_data) { in mlx4_post_send()
394 while (len >= MLX4_INLINE_ALIGN - off) { in mlx4_post_send()
395 to_copy = MLX4_INLINE_ALIGN - off; in mlx4_post_send()
397 len -= to_copy; in mlx4_post_send()
402 seg->byte_count = htobe32(MLX4_INLINE_SEG | seg_len); in mlx4_post_send()
410 memcpy(wqe, addr, len); in mlx4_post_send()
411 wqe += len; in mlx4_post_send()
412 seg_len += len; in mlx4_post_send()
413 off += len; in mlx4_post_send()
423 * 64-byte chunk with this inline in mlx4_post_send()
430 seg->byte_count = htobe32(MLX4_INLINE_SEG | seg_len); in mlx4_post_send()
437 for (i = wr->num_sge - 1; i >= 0 ; --i) in mlx4_post_send()
438 set_data_seg(seg + i, wr->sg_list + i); in mlx4_post_send()
440 size += wr->num_sge * (sizeof *seg / 16); in mlx4_post_send()
443 ctrl->fence_size = (wr->send_flags & IBV_SEND_FENCE ? in mlx4_post_send()
453 ctrl->owner_opcode = htobe32(mlx4_ib_opcode[wr->opcode]) | in mlx4_post_send()
454 (ind & qp->sq.wqe_cnt ? htobe32(1 << 31) : 0); in mlx4_post_send()
461 if (wr->next) in mlx4_post_send()
462 stamp_send_wqe(qp, (ind + qp->sq_spare_wqes) & in mlx4_post_send()
463 (qp->sq.wqe_cnt - 1)); in mlx4_post_send()
469 ctx = to_mctx(ibqp->context); in mlx4_post_send()
471 if (nreq == 1 && inl && size > 1 && size <= ctx->bf_buf_size / 16) { in mlx4_post_send()
472 ctrl->owner_opcode |= htobe32((qp->sq.head & 0xffff) << 8); in mlx4_post_send()
474 ctrl->bf_qpn |= qp->doorbell_qpn; in mlx4_post_send()
475 ++qp->sq.head; in mlx4_post_send()
480 mmio_wc_spinlock(&ctx->bf_lock); in mlx4_post_send()
482 mlx4_bf_copy(ctx->bf_page + ctx->bf_offset, (unsigned long *) ctrl, in mlx4_post_send()
487 ctx->bf_offset ^= ctx->bf_buf_size; in mlx4_post_send()
489 pthread_spin_unlock(&ctx->bf_lock); in mlx4_post_send()
491 qp->sq.head += nreq; in mlx4_post_send()
499 mmio_writel((unsigned long)(ctx->uar + MLX4_SEND_DOORBELL), in mlx4_post_send()
500 qp->doorbell_qpn); in mlx4_post_send()
504 stamp_send_wqe(qp, (ind + qp->sq_spare_wqes - 1) & in mlx4_post_send()
505 (qp->sq.wqe_cnt - 1)); in mlx4_post_send()
507 pthread_spin_unlock(&qp->sq.lock); in mlx4_post_send()
522 pthread_spin_lock(&qp->rq.lock); in mlx4_post_recv()
526 ind = qp->rq.head & (qp->rq.wqe_cnt - 1); in mlx4_post_recv()
528 for (nreq = 0; wr; ++nreq, wr = wr->next) { in mlx4_post_recv()
529 if (wq_overflow(&qp->rq, nreq, to_mcq(ibqp->recv_cq))) { in mlx4_post_recv()
535 if (wr->num_sge > qp->rq.max_gs) { in mlx4_post_recv()
543 for (i = 0; i < wr->num_sge; ++i) in mlx4_post_recv()
544 __set_data_seg(scat + i, wr->sg_list + i); in mlx4_post_recv()
546 if (i < qp->rq.max_gs) { in mlx4_post_recv()
552 qp->rq.wrid[ind] = wr->wr_id; in mlx4_post_recv()
554 ind = (ind + 1) & (qp->rq.wqe_cnt - 1); in mlx4_post_recv()
559 qp->rq.head += nreq; in mlx4_post_recv()
567 *qp->db = htobe32(qp->rq.head & 0xffff); in mlx4_post_recv()
570 pthread_spin_unlock(&qp->rq.lock); in mlx4_post_recv()
595 return (data + MLX4_INLINE_ALIGN - sizeof (struct mlx4_wqe_inline_seg) - 1) / in num_inline_segs()
596 (MLX4_INLINE_ALIGN - sizeof (struct mlx4_wqe_inline_seg)); in num_inline_segs()
605 max_sq_sge = align(cap->max_inline_data + in mlx4_calc_sq_wqe_size()
606 num_inline_segs(cap->max_inline_data, type) * in mlx4_calc_sq_wqe_size()
610 if (max_sq_sge < cap->max_send_sge) in mlx4_calc_sq_wqe_size()
611 max_sq_sge = cap->max_send_sge; in mlx4_calc_sq_wqe_size()
648 for (qp->sq.wqe_shift = 6; 1 << qp->sq.wqe_shift < size; in mlx4_calc_sq_wqe_size()
649 qp->sq.wqe_shift++) in mlx4_calc_sq_wqe_size()
656 qp->rq.max_gs = cap->max_recv_sge; in mlx4_alloc_qp_buf()
658 if (qp->sq.wqe_cnt) { in mlx4_alloc_qp_buf()
659 qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof (uint64_t)); in mlx4_alloc_qp_buf()
660 if (!qp->sq.wrid) in mlx4_alloc_qp_buf()
661 return -1; in mlx4_alloc_qp_buf()
664 if (qp->rq.wqe_cnt) { in mlx4_alloc_qp_buf()
665 qp->rq.wrid = malloc(qp->rq.wqe_cnt * sizeof (uint64_t)); in mlx4_alloc_qp_buf()
666 if (!qp->rq.wrid) { in mlx4_alloc_qp_buf()
667 free(qp->sq.wrid); in mlx4_alloc_qp_buf()
668 return -1; in mlx4_alloc_qp_buf()
672 for (qp->rq.wqe_shift = 4; in mlx4_alloc_qp_buf()
673 1 << qp->rq.wqe_shift < qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg); in mlx4_alloc_qp_buf()
674 qp->rq.wqe_shift++) in mlx4_alloc_qp_buf()
677 qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + in mlx4_alloc_qp_buf()
678 (qp->sq.wqe_cnt << qp->sq.wqe_shift); in mlx4_alloc_qp_buf()
679 if (qp->rq.wqe_shift > qp->sq.wqe_shift) { in mlx4_alloc_qp_buf()
680 qp->rq.offset = 0; in mlx4_alloc_qp_buf()
681 qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; in mlx4_alloc_qp_buf()
683 qp->rq.offset = qp->sq.wqe_cnt << qp->sq.wqe_shift; in mlx4_alloc_qp_buf()
684 qp->sq.offset = 0; in mlx4_alloc_qp_buf()
687 if (qp->buf_size) { in mlx4_alloc_qp_buf()
688 if (mlx4_alloc_buf(&qp->buf, in mlx4_alloc_qp_buf()
689 align(qp->buf_size, to_mdev(context->device)->page_size), in mlx4_alloc_qp_buf()
690 to_mdev(context->device)->page_size)) { in mlx4_alloc_qp_buf()
691 free(qp->sq.wrid); in mlx4_alloc_qp_buf()
692 free(qp->rq.wrid); in mlx4_alloc_qp_buf()
693 return -1; in mlx4_alloc_qp_buf()
696 memset(qp->buf.buf, 0, qp->buf_size); in mlx4_alloc_qp_buf()
698 qp->buf.buf = NULL; in mlx4_alloc_qp_buf()
709 wqe_size = (1 << qp->sq.wqe_shift) - sizeof (struct mlx4_wqe_ctrl_seg); in mlx4_set_sq_sizes()
712 wqe_size -= sizeof (struct mlx4_wqe_datagram_seg); in mlx4_set_sq_sizes()
718 wqe_size -= sizeof (struct mlx4_wqe_raddr_seg); in mlx4_set_sq_sizes()
725 qp->sq.max_gs = wqe_size / sizeof (struct mlx4_wqe_data_seg); in mlx4_set_sq_sizes()
726 cap->max_send_sge = qp->sq.max_gs; in mlx4_set_sq_sizes()
727 qp->sq.max_post = qp->sq.wqe_cnt - qp->sq_spare_wqes; in mlx4_set_sq_sizes()
728 cap->max_send_wr = qp->sq.max_post; in mlx4_set_sq_sizes()
732 * subtract off one segment header for each 64-byte chunk, in mlx4_set_sq_sizes()
734 * 64 for non-UD QPs. in mlx4_set_sq_sizes()
736 qp->max_inline_data = wqe_size - in mlx4_set_sq_sizes()
739 cap->max_inline_data = qp->max_inline_data; in mlx4_set_sq_sizes()
744 int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; in mlx4_find_qp()
746 if (ctx->qp_table[tind].refcnt) in mlx4_find_qp()
747 return ctx->qp_table[tind].table[qpn & ctx->qp_table_mask]; in mlx4_find_qp()
754 int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; in mlx4_store_qp()
756 if (!ctx->qp_table[tind].refcnt) { in mlx4_store_qp()
757 ctx->qp_table[tind].table = calloc(ctx->qp_table_mask + 1, in mlx4_store_qp()
759 if (!ctx->qp_table[tind].table) in mlx4_store_qp()
760 return -1; in mlx4_store_qp()
763 ++ctx->qp_table[tind].refcnt; in mlx4_store_qp()
764 ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = qp; in mlx4_store_qp()
770 int tind = (qpn & (ctx->num_qps - 1)) >> ctx->qp_table_shift; in mlx4_clear_qp()
772 if (!--ctx->qp_table[tind].refcnt) in mlx4_clear_qp()
773 free(ctx->qp_table[tind].table); in mlx4_clear_qp()
775 ctx->qp_table[tind].table[qpn & ctx->qp_table_mask] = NULL; in mlx4_clear_qp()