1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 */ 4 5 #include "spdk/stdinc.h" 6 #include "spdk/queue.h" 7 #include "spdk/barrier.h" 8 #include "spdk/likely.h" 9 10 #include <infiniband/mlx5dv.h> 11 #include "spdk_internal/mlx5.h" 12 13 /** 14 * Low level CQ representation, suitable for the direct polling 15 */ 16 struct mlx5_hw_cq { 17 uint64_t cq_addr; 18 uint32_t cqe_cnt; 19 uint32_t cqe_size; 20 uint32_t ci; 21 uint32_t cq_num; 22 }; 23 24 /** 25 * Low level CQ representation, suitable for the WQEs submission. 26 * Only submission queue is supported, receive queue is omitted since not used right now 27 */ 28 struct mlx5_hw_qp { 29 uint64_t dbr_addr; 30 uint64_t sq_addr; 31 uint64_t sq_bf_addr; 32 uint32_t sq_wqe_cnt; 33 uint16_t sq_pi; 34 uint32_t sq_tx_db_nc; 35 uint32_t qp_num; 36 }; 37 38 /* qp_num is 24 bits. 2D lookup table uses upper and lower 12 bits to find a qp by qp_num */ 39 #define SPDK_MLX5_QP_NUM_UPPER_SHIFT (12) 40 #define SPDK_MLX5_QP_NUM_LOWER_MASK ((1 << SPDK_MLX5_QP_NUM_UPPER_SHIFT) - 1) 41 #define SPDK_MLX5_QP_NUM_LUT_SIZE (1 << 12) 42 43 struct spdk_mlx5_cq { 44 struct mlx5_hw_cq hw; 45 struct { 46 struct spdk_mlx5_qp **table; 47 uint32_t count; 48 } qps [SPDK_MLX5_QP_NUM_LUT_SIZE]; 49 struct ibv_cq *verbs_cq; 50 uint32_t qps_count; 51 }; 52 53 struct mlx5_qp_sq_completion { 54 uint64_t wr_id; 55 /* Number of unsignaled completions before this one. Used to track qp overflow */ 56 uint32_t completions; 57 }; 58 59 struct spdk_mlx5_qp { 60 struct mlx5_hw_qp hw; 61 struct mlx5_qp_sq_completion *completions; 62 /* Pointer to a last WQE controll segment written to SQ */ 63 struct mlx5_wqe_ctrl_seg *ctrl; 64 struct spdk_mlx5_cq *cq; 65 struct ibv_qp *verbs_qp; 66 /* Number of WQEs submitted to HW which won't produce a CQE */ 67 uint16_t nonsignaled_outstanding; 68 uint16_t max_send_sge; 69 /* Number of WQEs available for submission */ 70 uint16_t tx_available; 71 uint16_t last_pi; 72 uint8_t sigmode; 73 }; 74 75 enum { 76 /* Default mode, use flags passed by the user */ 77 SPDK_MLX5_QP_SIG_NONE = 0, 78 /* Enable completion for every control WQE segment, regardless of the flags passed by the user */ 79 SPDK_MLX5_QP_SIG_ALL = 1, 80 /* Enable completion only for the last control WQE segment, regardless of the flags passed by the user */ 81 SPDK_MLX5_QP_SIG_LAST = 2, 82 }; 83 84 /** 85 * Completion and Event mode (SPDK_MLX5_WQE_CTRL_CE_*) 86 * Maps internal representation of completion events configuration to PRM values 87 * g_mlx5_ce_map[][X] is fm_ce_se >> 2 & 0x3 */ 88 static uint8_t g_mlx5_ce_map[3][4] = { 89 /* SPDK_MLX5_QP_SIG_NONE */ 90 [0] = { 91 [0] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR, 92 [1] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR, 93 [2] = SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE, 94 [3] = SPDK_MLX5_WQE_CTRL_CE_CQ_ECE 95 }, 96 /* SPDK_MLX5_QP_SIG_ALL */ 97 [1] = { 98 [0] = SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE, 99 [1] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR, 100 [2] = SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE, 101 [3] = SPDK_MLX5_WQE_CTRL_CE_CQ_ECE 102 }, 103 /* SPDK_MLX5_QP_SIG_LAST */ 104 [2] = { 105 [0] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR, 106 [1] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR, 107 [2] = SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR, 108 [3] = SPDK_MLX5_WQE_CTRL_CE_CQ_ECE 109 } 110 }; 111 112 struct mlx5_crypto_bsf_seg { 113 uint8_t size_type; 114 uint8_t enc_order; 115 uint8_t rsvd0; 116 uint8_t enc_standard; 117 __be32 raw_data_size; 118 uint8_t crypto_block_size_pointer; 119 uint8_t rsvd1[7]; 120 uint8_t xts_initial_tweak[16]; 121 __be32 dek_pointer; 122 uint8_t rsvd2[4]; 123 uint8_t keytag[8]; 124 uint8_t rsvd3[16]; 125 }; 126 127 struct mlx5_sig_bsf_inl { 128 __be16 vld_refresh; 129 __be16 dif_apptag; 130 __be32 dif_reftag; 131 uint8_t sig_type; 132 uint8_t rp_inv_seed; 133 uint8_t rsvd[3]; 134 uint8_t dif_inc_ref_guard_check; 135 __be16 dif_app_bitmask_check; 136 }; 137 138 struct mlx5_sig_bsf_seg { 139 struct mlx5_sig_bsf_basic { 140 uint8_t bsf_size_sbs; 141 uint8_t check_byte_mask; 142 union { 143 uint8_t copy_byte_mask; 144 uint8_t bs_selector; 145 uint8_t rsvd_wflags; 146 } wire; 147 union { 148 uint8_t bs_selector; 149 uint8_t rsvd_mflags; 150 } mem; 151 __be32 raw_data_size; 152 __be32 w_bfs_psv; 153 __be32 m_bfs_psv; 154 } basic; 155 struct mlx5_sig_bsf_ext { 156 __be32 t_init_gen_pro_size; 157 __be32 rsvd_epi_size; 158 __be32 w_tfs_psv; 159 __be32 m_tfs_psv; 160 } ext; 161 struct mlx5_sig_bsf_inl w_inl; 162 struct mlx5_sig_bsf_inl m_inl; 163 }; 164 165 struct mlx5_wqe_set_psv_seg { 166 __be32 psv_index; 167 __be16 syndrome; 168 uint8_t reserved[2]; 169 __be64 transient_signature; 170 }; 171 172 static inline uint8_t 173 mlx5_qp_fm_ce_se_update(struct spdk_mlx5_qp *qp, uint8_t fm_ce_se) 174 { 175 uint8_t ce = (fm_ce_se >> 2) & 0x3; 176 177 assert((ce & (~0x3)) == 0); 178 fm_ce_se &= ~SPDK_MLX5_WQE_CTRL_CE_MASK; 179 fm_ce_se |= g_mlx5_ce_map[qp->sigmode][ce]; 180 181 return fm_ce_se; 182 } 183 184 static inline void * 185 mlx5_qp_get_wqe_bb(struct mlx5_hw_qp *hw_qp) 186 { 187 return (void *)hw_qp->sq_addr + (hw_qp->sq_pi & (hw_qp->sq_wqe_cnt - 1)) * MLX5_SEND_WQE_BB; 188 } 189 190 static inline void * 191 mlx5_qp_get_next_wqebb(struct mlx5_hw_qp *hw_qp, uint32_t *to_end, void *cur) 192 { 193 *to_end -= MLX5_SEND_WQE_BB; 194 if (*to_end == 0) { /* wqe buffer wap around */ 195 *to_end = hw_qp->sq_wqe_cnt * MLX5_SEND_WQE_BB; 196 return (void *)(uintptr_t)hw_qp->sq_addr; 197 } 198 199 return ((char *)cur) + MLX5_SEND_WQE_BB; 200 } 201 202 static inline void 203 mlx5_qp_set_comp(struct spdk_mlx5_qp *qp, uint16_t pi, 204 uint64_t wr_id, uint32_t fm_ce_se, uint32_t n_bb) 205 { 206 qp->completions[pi].wr_id = wr_id; 207 if ((fm_ce_se & SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE) != SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE) { 208 /* non-signaled WQE, accumulate it in outstanding */ 209 qp->nonsignaled_outstanding += n_bb; 210 qp->completions[pi].completions = 0; 211 return; 212 } 213 214 /* Store number of previous nonsignaled WQEs */ 215 qp->completions[pi].completions = qp->nonsignaled_outstanding + n_bb; 216 qp->nonsignaled_outstanding = 0; 217 } 218 219 #if defined(__aarch64__) 220 #define spdk_memory_bus_store_fence() asm volatile("dmb oshst" ::: "memory") 221 #elif defined(__i386__) || defined(__x86_64__) 222 #define spdk_memory_bus_store_fence() spdk_wmb() 223 #endif 224 225 static inline void 226 mlx5_update_tx_db(struct spdk_mlx5_qp *qp) 227 { 228 /* 229 * Use cpu barrier to prevent code reordering 230 */ 231 spdk_smp_wmb(); 232 233 ((uint32_t *)qp->hw.dbr_addr)[MLX5_SND_DBR] = htobe32(qp->hw.sq_pi); 234 } 235 236 static inline void 237 mlx5_flush_tx_db(struct spdk_mlx5_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl) 238 { 239 *(uint64_t *)(qp->hw.sq_bf_addr) = *(uint64_t *)ctrl; 240 } 241 242 static inline void 243 mlx5_ring_tx_db(struct spdk_mlx5_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl) 244 { 245 /* 8.9.3.1 Posting a Work Request to Work Queue 246 * 1. Write WQE to the WQE buffer sequentially to previously-posted 247 * WQE (on WQEBB granularity) 248 * 249 * 2. Update Doorbell Record associated with that queue by writing 250 * the sq_wqebb_counter or wqe_counter for send and RQ respectively 251 */ 252 mlx5_update_tx_db(qp); 253 254 /* Make sure that doorbell record is written before ringing the doorbell */ 255 spdk_memory_bus_store_fence(); 256 257 /* 3. For send request ring DoorBell by writing to the Doorbell 258 * Register field in the UAR associated with that queue 259 */ 260 mlx5_flush_tx_db(qp, ctrl); 261 262 /* If UAR is mapped as WC (write combined) we need another fence to 263 * force write. Otherwise it may take a long time. 264 * On BF2/1 uar is mapped as NC (non combined) and fence is not needed 265 * here. 266 */ 267 #if !defined(__aarch64__) 268 if (!qp->hw.sq_tx_db_nc) { 269 spdk_memory_bus_store_fence(); 270 } 271 #endif 272 } 273 274 #ifdef DEBUG 275 void mlx5_qp_dump_wqe(struct spdk_mlx5_qp *qp, int n_wqe_bb); 276 #else 277 #define mlx5_qp_dump_wqe(...) do { } while (0) 278 #endif 279 280 static inline void 281 mlx5_qp_wqe_submit(struct spdk_mlx5_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl, uint16_t n_wqe_bb, 282 uint16_t ctrlr_pi) 283 { 284 mlx5_qp_dump_wqe(qp, n_wqe_bb); 285 286 /* Delay ringing the doorbell */ 287 qp->hw.sq_pi += n_wqe_bb; 288 qp->last_pi = ctrlr_pi; 289 qp->ctrl = ctrl; 290 } 291 292 static inline void 293 mlx5_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *ctrl, uint16_t pi, 294 uint8_t opcode, uint8_t opmod, uint32_t qp_num, 295 uint8_t fm_ce_se, uint8_t ds, 296 uint8_t signature, uint32_t imm) 297 { 298 *(uint32_t *)((void *)ctrl + 8) = 0; 299 mlx5dv_set_ctrl_seg(ctrl, pi, opcode, opmod, qp_num, 300 fm_ce_se, ds, signature, imm); 301 } 302 303 static inline struct spdk_mlx5_qp * 304 mlx5_cq_find_qp(struct spdk_mlx5_cq *cq, uint32_t qp_num) 305 { 306 uint32_t qpn_upper = qp_num >> SPDK_MLX5_QP_NUM_UPPER_SHIFT; 307 uint32_t qpn_mask = qp_num & SPDK_MLX5_QP_NUM_LOWER_MASK; 308 309 if (spdk_unlikely(!cq->qps[qpn_upper].count)) { 310 return NULL; 311 } 312 return cq->qps[qpn_upper].table[qpn_mask]; 313 } 314 315 static inline int 316 mlx5_get_pd_id(struct ibv_pd *pd, uint32_t *pd_id) 317 { 318 struct mlx5dv_pd pd_info; 319 struct mlx5dv_obj obj; 320 int rc; 321 322 if (!pd) { 323 return -EINVAL; 324 } 325 obj.pd.in = pd; 326 obj.pd.out = &pd_info; 327 rc = mlx5dv_init_obj(&obj, MLX5DV_OBJ_PD); 328 if (rc) { 329 return rc; 330 } 331 *pd_id = pd_info.pdn; 332 333 return 0; 334 } 335