1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 */ 4 5 #include "spdk/stdinc.h" 6 #include "spdk/queue.h" 7 #include "spdk/barrier.h" 8 #include "spdk/likely.h" 9 10 #include <infiniband/mlx5dv.h> 11 #include "spdk_internal/mlx5.h" 12 13 /** 14 * Low level CQ representation, suitable for the direct polling 15 */ 16 struct mlx5_hw_cq { 17 uint64_t cq_addr; 18 uint32_t cqe_cnt; 19 uint32_t cqe_size; 20 uint32_t ci; 21 uint32_t cq_num; 22 }; 23 24 /** 25 * Low level CQ representation, suitable for the WQEs submission. 26 * Only submission queue is supported, receive queue is omitted since not used right now 27 */ 28 struct mlx5_hw_qp { 29 uint64_t dbr_addr; 30 uint64_t sq_addr; 31 uint64_t sq_bf_addr; 32 uint32_t sq_wqe_cnt; 33 uint16_t sq_pi; 34 uint32_t sq_tx_db_nc; 35 uint32_t qp_num; 36 }; 37 38 /* qp_num is 24 bits. 2D lookup table uses upper and lower 12 bits to find a qp by qp_num */ 39 #define SPDK_MLX5_QP_NUM_UPPER_SHIFT (12) 40 #define SPDK_MLX5_QP_NUM_LOWER_MASK ((1 << SPDK_MLX5_QP_NUM_UPPER_SHIFT) - 1) 41 #define SPDK_MLX5_QP_NUM_LUT_SIZE (1 << 12) 42 43 struct spdk_mlx5_cq { 44 struct mlx5_hw_cq hw; 45 struct { 46 struct spdk_mlx5_qp **table; 47 uint32_t count; 48 } qps [SPDK_MLX5_QP_NUM_LUT_SIZE]; 49 struct ibv_cq *verbs_cq; 50 uint32_t qps_count; 51 }; 52 53 struct mlx5_qp_sq_completion { 54 uint64_t wr_id; 55 /* Number of unsignaled completions before this one. Used to track qp overflow */ 56 uint32_t completions; 57 }; 58 59 struct spdk_mlx5_qp { 60 struct mlx5_hw_qp hw; 61 struct mlx5_qp_sq_completion *completions; 62 /* Pointer to a last WQE controll segment written to SQ */ 63 struct mlx5_wqe_ctrl_seg *ctrl; 64 struct spdk_mlx5_cq *cq; 65 struct ibv_qp *verbs_qp; 66 /* Number of WQEs submitted to HW which won't produce a CQE */ 67 uint16_t nonsignaled_outstanding; 68 uint16_t max_send_sge; 69 /* Number of WQEs available for submission */ 70 uint16_t tx_available; 71 uint16_t last_pi; 72 uint8_t sigmode; 73 }; 74 75 enum { 76 /* Default mode, use flags passed by the user */ 77 SPDK_MLX5_QP_SIG_NONE = 0, 78 /* Enable completion for every control WQE segment, regardless of the flags passed by the user */ 79 SPDK_MLX5_QP_SIG_ALL = 1, 80 /* Enable completion only for the last control WQE segment, regardless of the flags passed by the user */ 81 SPDK_MLX5_QP_SIG_LAST = 2, 82 }; 83 84 /** 85 * Completion and Event mode (SPDK_MLX5_WQE_CTRL_CE_*) 86 * Maps internal representation of completion events configuration to PRM values 87 * g_mlx5_ce_map[][X] is fm_ce_se >> 2 & 0x3 */ 88 static uint8_t g_mlx5_ce_map[3][4] = { 89 /* SPDK_MLX5_QP_SIG_NONE */ 90 [0] = { 91 [0] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR, 92 [1] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR, 93 [2] = SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE, 94 [3] = SPDK_MLX5_WQE_CTRL_CE_CQ_ECE 95 }, 96 /* SPDK_MLX5_QP_SIG_ALL */ 97 [1] = { 98 [0] = SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE, 99 [1] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR, 100 [2] = SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE, 101 [3] = SPDK_MLX5_WQE_CTRL_CE_CQ_ECE 102 }, 103 /* SPDK_MLX5_QP_SIG_LAST */ 104 [2] = { 105 [0] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR, 106 [1] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR, 107 [2] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR, 108 [3] = SPDK_MLX5_WQE_CTRL_CE_CQ_ECE 109 } 110 }; 111 112 struct mlx5_crypto_bsf_seg { 113 uint8_t size_type; 114 uint8_t enc_order; 115 uint8_t rsvd0; 116 uint8_t enc_standard; 117 __be32 raw_data_size; 118 uint8_t crypto_block_size_pointer; 119 uint8_t rsvd1[7]; 120 uint8_t xts_initial_tweak[16]; 121 __be32 dek_pointer; 122 uint8_t rsvd2[4]; 123 uint8_t keytag[8]; 124 uint8_t rsvd3[16]; 125 }; 126 127 static inline uint8_t 128 mlx5_qp_fm_ce_se_update(struct spdk_mlx5_qp *qp, uint8_t fm_ce_se) 129 { 130 uint8_t ce = (fm_ce_se >> 2) & 0x3; 131 132 assert((ce & (~0x3)) == 0); 133 fm_ce_se &= ~SPDK_MLX5_WQE_CTRL_CE_MASK; 134 fm_ce_se |= g_mlx5_ce_map[qp->sigmode][ce]; 135 136 return fm_ce_se; 137 } 138 139 static inline void * 140 mlx5_qp_get_wqe_bb(struct mlx5_hw_qp *hw_qp) 141 { 142 return (void *)hw_qp->sq_addr + (hw_qp->sq_pi & (hw_qp->sq_wqe_cnt - 1)) * MLX5_SEND_WQE_BB; 143 } 144 145 static inline void * 146 mlx5_qp_get_next_wqebb(struct mlx5_hw_qp *hw_qp, uint32_t *to_end, void *cur) 147 { 148 *to_end -= MLX5_SEND_WQE_BB; 149 if (*to_end == 0) { /* wqe buffer wap around */ 150 *to_end = hw_qp->sq_wqe_cnt * MLX5_SEND_WQE_BB; 151 return (void *)(uintptr_t)hw_qp->sq_addr; 152 } 153 154 return ((char *)cur) + MLX5_SEND_WQE_BB; 155 } 156 157 static inline void 158 mlx5_qp_set_comp(struct spdk_mlx5_qp *qp, uint16_t pi, 159 uint64_t wr_id, uint32_t fm_ce_se, uint32_t n_bb) 160 { 161 qp->completions[pi].wr_id = wr_id; 162 if ((fm_ce_se & SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE) != SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE) { 163 /* non-signaled WQE, accumulate it in outstanding */ 164 qp->nonsignaled_outstanding += n_bb; 165 qp->completions[pi].completions = 0; 166 return; 167 } 168 169 /* Store number of previous nonsignaled WQEs */ 170 qp->completions[pi].completions = qp->nonsignaled_outstanding + n_bb; 171 qp->nonsignaled_outstanding = 0; 172 } 173 174 #if defined(__aarch64__) 175 #define spdk_memory_bus_store_fence() asm volatile("dmb oshst" ::: "memory") 176 #elif defined(__i386__) || defined(__x86_64__) 177 #define spdk_memory_bus_store_fence() spdk_wmb() 178 #endif 179 180 static inline void 181 mlx5_update_tx_db(struct spdk_mlx5_qp *qp) 182 { 183 /* 184 * Use cpu barrier to prevent code reordering 185 */ 186 spdk_smp_wmb(); 187 188 ((uint32_t *)qp->hw.dbr_addr)[MLX5_SND_DBR] = htobe32(qp->hw.sq_pi); 189 } 190 191 static inline void 192 mlx5_flush_tx_db(struct spdk_mlx5_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl) 193 { 194 *(uint64_t *)(qp->hw.sq_bf_addr) = *(uint64_t *)ctrl; 195 } 196 197 static inline void 198 mlx5_ring_tx_db(struct spdk_mlx5_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl) 199 { 200 /* 8.9.3.1 Posting a Work Request to Work Queue 201 * 1. Write WQE to the WQE buffer sequentially to previously-posted 202 * WQE (on WQEBB granularity) 203 * 204 * 2. Update Doorbell Record associated with that queue by writing 205 * the sq_wqebb_counter or wqe_counter for send and RQ respectively 206 */ 207 mlx5_update_tx_db(qp); 208 209 /* Make sure that doorbell record is written before ringing the doorbell */ 210 spdk_memory_bus_store_fence(); 211 212 /* 3. For send request ring DoorBell by writing to the Doorbell 213 * Register field in the UAR associated with that queue 214 */ 215 mlx5_flush_tx_db(qp, ctrl); 216 217 /* If UAR is mapped as WC (write combined) we need another fence to 218 * force write. Otherwise it may take a long time. 219 * On BF2/1 uar is mapped as NC (non combined) and fence is not needed 220 * here. 221 */ 222 #if !defined(__aarch64__) 223 if (!qp->hw.sq_tx_db_nc) { 224 spdk_memory_bus_store_fence(); 225 } 226 #endif 227 } 228 229 #ifdef DEBUG 230 void mlx5_qp_dump_wqe(struct spdk_mlx5_qp *qp, int n_wqe_bb); 231 #else 232 #define mlx5_qp_dump_wqe(...) do { } while (0) 233 #endif 234 235 static inline void 236 mlx5_qp_wqe_submit(struct spdk_mlx5_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl, uint16_t n_wqe_bb, 237 uint16_t ctrlr_pi) 238 { 239 mlx5_qp_dump_wqe(qp, n_wqe_bb); 240 241 /* Delay ringing the doorbell */ 242 qp->hw.sq_pi += n_wqe_bb; 243 qp->last_pi = ctrlr_pi; 244 qp->ctrl = ctrl; 245 } 246 247 static inline void 248 mlx5_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *ctrl, uint16_t pi, 249 uint8_t opcode, uint8_t opmod, uint32_t qp_num, 250 uint8_t fm_ce_se, uint8_t ds, 251 uint8_t signature, uint32_t imm) 252 { 253 *(uint32_t *)((void *)ctrl + 8) = 0; 254 mlx5dv_set_ctrl_seg(ctrl, pi, opcode, opmod, qp_num, 255 fm_ce_se, ds, signature, imm); 256 } 257 258 static inline struct spdk_mlx5_qp * 259 mlx5_cq_find_qp(struct spdk_mlx5_cq *cq, uint32_t qp_num) 260 { 261 uint32_t qpn_upper = qp_num >> SPDK_MLX5_QP_NUM_UPPER_SHIFT; 262 uint32_t qpn_mask = qp_num & SPDK_MLX5_QP_NUM_LOWER_MASK; 263 264 if (spdk_unlikely(!cq->qps[qpn_upper].count)) { 265 return NULL; 266 } 267 return cq->qps[qpn_upper].table[qpn_mask]; 268 } 269 270 static inline int 271 mlx5_get_pd_id(struct ibv_pd *pd, uint32_t *pd_id) 272 { 273 struct mlx5dv_pd pd_info; 274 struct mlx5dv_obj obj; 275 int rc; 276 277 if (!pd) { 278 return -EINVAL; 279 } 280 obj.pd.in = pd; 281 obj.pd.out = &pd_info; 282 rc = mlx5dv_init_obj(&obj, MLX5DV_OBJ_PD); 283 if (rc) { 284 return rc; 285 } 286 *pd_id = pd_info.pdn; 287 288 return 0; 289 } 290