1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 */ 4 5 #include "spdk/stdinc.h" 6 #include "spdk/queue.h" 7 #include "spdk/barrier.h" 8 #include "spdk/likely.h" 9 10 #include <infiniband/mlx5dv.h> 11 #include "spdk_internal/mlx5.h" 12 13 /** 14 * Low level CQ representation, suitable for the direct polling 15 */ 16 struct mlx5_hw_cq { 17 uint64_t cq_addr; 18 uint32_t cqe_cnt; 19 uint32_t cqe_size; 20 uint32_t ci; 21 uint32_t cq_num; 22 }; 23 24 /** 25 * Low level CQ representation, suitable for the WQEs submission. 26 * Only submission queue is supported, receive queue is omitted since not used right now 27 */ 28 struct mlx5_hw_qp { 29 uint64_t dbr_addr; 30 uint64_t sq_addr; 31 uint64_t sq_bf_addr; 32 uint32_t sq_wqe_cnt; 33 uint16_t sq_pi; 34 uint32_t sq_tx_db_nc; 35 uint32_t qp_num; 36 }; 37 38 /* qp_num is 24 bits. 2D lookup table uses upper and lower 12 bits to find a qp by qp_num */ 39 #define SPDK_MLX5_QP_NUM_UPPER_SHIFT (12) 40 #define SPDK_MLX5_QP_NUM_LOWER_MASK ((1 << SPDK_MLX5_QP_NUM_UPPER_SHIFT) - 1) 41 #define SPDK_MLX5_QP_NUM_LUT_SIZE (1 << 12) 42 43 struct spdk_mlx5_cq { 44 struct mlx5_hw_cq hw; 45 struct { 46 struct spdk_mlx5_qp **table; 47 uint32_t count; 48 } qps [SPDK_MLX5_QP_NUM_LUT_SIZE]; 49 struct ibv_cq *verbs_cq; 50 uint32_t qps_count; 51 }; 52 53 struct mlx5_qp_sq_completion { 54 uint64_t wr_id; 55 /* Number of unsignaled completions before this one. Used to track qp overflow */ 56 uint32_t completions; 57 }; 58 59 struct spdk_mlx5_qp { 60 struct mlx5_hw_qp hw; 61 struct mlx5_qp_sq_completion *completions; 62 /* Pointer to a last WQE controll segment written to SQ */ 63 struct mlx5_wqe_ctrl_seg *ctrl; 64 struct spdk_mlx5_cq *cq; 65 struct ibv_qp *verbs_qp; 66 /* Number of WQEs submitted to HW which won't produce a CQE */ 67 uint16_t nonsignaled_outstanding; 68 uint16_t max_send_sge; 69 /* Number of WQEs available for submission */ 70 uint16_t tx_available; 71 uint16_t last_pi; 72 uint8_t sigmode; 73 }; 74 75 enum { 76 /* Default mode, use flags passed by the user */ 77 SPDK_MLX5_QP_SIG_NONE = 0, 78 /* Enable completion for every control WQE segment, regardless of the flags passed by the user */ 79 SPDK_MLX5_QP_SIG_ALL = 1, 80 /* Enable completion only for the last control WQE segment, regardless of the flags passed by the user */ 81 SPDK_MLX5_QP_SIG_LAST = 2, 82 }; 83 84 /** 85 * Completion and Event mode (SPDK_MLX5_WQE_CTRL_CE_*) 86 * Maps internal representation of completion events configuration to PRM values 87 * g_mlx5_ce_map[][X] is fm_ce_se >> 2 & 0x3 */ 88 static uint8_t g_mlx5_ce_map[3][4] = { 89 /* SPDK_MLX5_QP_SIG_NONE */ 90 [0] = { 91 [0] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR, 92 [1] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR, 93 [2] = SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE, 94 [3] = SPDK_MLX5_WQE_CTRL_CE_CQ_ECE 95 }, 96 /* SPDK_MLX5_QP_SIG_ALL */ 97 [1] = { 98 [0] = SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE, 99 [1] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR, 100 [2] = SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE, 101 [3] = SPDK_MLX5_WQE_CTRL_CE_CQ_ECE 102 }, 103 /* SPDK_MLX5_QP_SIG_LAST */ 104 [2] = { 105 [0] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR, 106 [1] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR, 107 [2] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR, 108 [3] = SPDK_MLX5_WQE_CTRL_CE_CQ_ECE 109 } 110 111 }; 112 113 static inline uint8_t 114 mlx5_qp_fm_ce_se_update(struct spdk_mlx5_qp *qp, uint8_t fm_ce_se) 115 { 116 uint8_t ce = (fm_ce_se >> 2) & 0x3; 117 118 assert((ce & (~0x3)) == 0); 119 fm_ce_se &= ~SPDK_MLX5_WQE_CTRL_CE_MASK; 120 fm_ce_se |= g_mlx5_ce_map[qp->sigmode][ce]; 121 122 return fm_ce_se; 123 } 124 125 static inline void * 126 mlx5_qp_get_wqe_bb(struct mlx5_hw_qp *hw_qp) 127 { 128 return (void *)hw_qp->sq_addr + (hw_qp->sq_pi & (hw_qp->sq_wqe_cnt - 1)) * MLX5_SEND_WQE_BB; 129 } 130 131 static inline void * 132 mlx5_qp_get_next_wqebb(struct mlx5_hw_qp *hw_qp, uint32_t *to_end, void *cur) 133 { 134 *to_end -= MLX5_SEND_WQE_BB; 135 if (*to_end == 0) { /* wqe buffer wap around */ 136 *to_end = hw_qp->sq_wqe_cnt * MLX5_SEND_WQE_BB; 137 return (void *)(uintptr_t)hw_qp->sq_addr; 138 } 139 140 return ((char *)cur) + MLX5_SEND_WQE_BB; 141 } 142 143 static inline void 144 mlx5_qp_set_comp(struct spdk_mlx5_qp *qp, uint16_t pi, 145 uint64_t wr_id, uint32_t fm_ce_se, uint32_t n_bb) 146 { 147 qp->completions[pi].wr_id = wr_id; 148 if ((fm_ce_se & SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE) != SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE) { 149 /* non-signaled WQE, accumulate it in outstanding */ 150 qp->nonsignaled_outstanding += n_bb; 151 qp->completions[pi].completions = 0; 152 return; 153 } 154 155 /* Store number of previous nonsignaled WQEs */ 156 qp->completions[pi].completions = qp->nonsignaled_outstanding + n_bb; 157 qp->nonsignaled_outstanding = 0; 158 } 159 160 #if defined(__aarch64__) 161 #define spdk_memory_bus_store_fence() asm volatile("dmb oshst" ::: "memory") 162 #elif defined(__i386__) || defined(__x86_64__) 163 #define spdk_memory_bus_store_fence() spdk_wmb() 164 #endif 165 166 static inline void 167 mlx5_update_tx_db(struct spdk_mlx5_qp *qp) 168 { 169 /* 170 * Use cpu barrier to prevent code reordering 171 */ 172 spdk_smp_wmb(); 173 174 ((uint32_t *)qp->hw.dbr_addr)[MLX5_SND_DBR] = htobe32(qp->hw.sq_pi); 175 } 176 177 static inline void 178 mlx5_flush_tx_db(struct spdk_mlx5_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl) 179 { 180 *(uint64_t *)(qp->hw.sq_bf_addr) = *(uint64_t *)ctrl; 181 } 182 183 static inline void 184 mlx5_ring_tx_db(struct spdk_mlx5_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl) 185 { 186 /* 8.9.3.1 Posting a Work Request to Work Queue 187 * 1. Write WQE to the WQE buffer sequentially to previously-posted 188 * WQE (on WQEBB granularity) 189 * 190 * 2. Update Doorbell Record associated with that queue by writing 191 * the sq_wqebb_counter or wqe_counter for send and RQ respectively 192 */ 193 mlx5_update_tx_db(qp); 194 195 /* Make sure that doorbell record is written before ringing the doorbell */ 196 spdk_memory_bus_store_fence(); 197 198 /* 3. For send request ring DoorBell by writing to the Doorbell 199 * Register field in the UAR associated with that queue 200 */ 201 mlx5_flush_tx_db(qp, ctrl); 202 203 /* If UAR is mapped as WC (write combined) we need another fence to 204 * force write. Otherwise it may take a long time. 205 * On BF2/1 uar is mapped as NC (non combined) and fence is not needed 206 * here. 207 */ 208 #if !defined(__aarch64__) 209 if (!qp->hw.sq_tx_db_nc) { 210 spdk_memory_bus_store_fence(); 211 } 212 #endif 213 } 214 215 #ifdef DEBUG 216 void mlx5_qp_dump_wqe(struct spdk_mlx5_qp *qp, int n_wqe_bb); 217 #else 218 #define mlx5_qp_dump_wqe(...) do { } while (0) 219 #endif 220 221 static inline void 222 mlx5_qp_wqe_submit(struct spdk_mlx5_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl, uint16_t n_wqe_bb, 223 uint16_t ctrlr_pi) 224 { 225 mlx5_qp_dump_wqe(qp, n_wqe_bb); 226 227 /* Delay ringing the doorbell */ 228 qp->hw.sq_pi += n_wqe_bb; 229 qp->last_pi = ctrlr_pi; 230 qp->ctrl = ctrl; 231 } 232 233 static inline void 234 mlx5_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *ctrl, uint16_t pi, 235 uint8_t opcode, uint8_t opmod, uint32_t qp_num, 236 uint8_t fm_ce_se, uint8_t ds, 237 uint8_t signature, uint32_t imm) 238 { 239 *(uint32_t *)((void *)ctrl + 8) = 0; 240 mlx5dv_set_ctrl_seg(ctrl, pi, opcode, opmod, qp_num, 241 fm_ce_se, ds, signature, imm); 242 } 243 244 static inline struct spdk_mlx5_qp * 245 mlx5_cq_find_qp(struct spdk_mlx5_cq *cq, uint32_t qp_num) 246 { 247 uint32_t qpn_upper = qp_num >> SPDK_MLX5_QP_NUM_UPPER_SHIFT; 248 uint32_t qpn_mask = qp_num & SPDK_MLX5_QP_NUM_LOWER_MASK; 249 250 if (spdk_unlikely(!cq->qps[qpn_upper].count)) { 251 return NULL; 252 } 253 return cq->qps[qpn_upper].table[qpn_mask]; 254 } 255