1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(C) 2023 Marvell. 3 */ 4 5 #include "cnxk_ep_vf.h" 6 #include "otx_ep_rxtx.h" 7 8 static inline uint32_t 9 cnxk_ep_check_tx_ism_mem(void *tx_queue) 10 { 11 struct otx_ep_instr_queue *iq = (struct otx_ep_instr_queue *)tx_queue; 12 uint32_t val; 13 14 /* Batch subtractions from the HW counter to reduce PCIe traffic 15 * This adds an extra local variable, but almost halves the 16 * number of PCIe writes. 17 */ 18 val = rte_atomic_load_explicit(iq->inst_cnt_ism, rte_memory_order_relaxed); 19 iq->inst_cnt += val - iq->inst_cnt_prev; 20 iq->inst_cnt_prev = val; 21 22 if (val > (uint32_t)(1 << 31)) { 23 /* Only subtract the packet count in the HW counter 24 * when count above halfway to saturation. 25 */ 26 rte_write64((uint64_t)val, iq->inst_cnt_reg); 27 rte_mb(); 28 29 rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg); 30 while (rte_atomic_load_explicit(iq->inst_cnt_ism, 31 rte_memory_order_relaxed) >= val) { 32 rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg); 33 rte_mb(); 34 } 35 36 iq->inst_cnt_prev = 0; 37 } 38 rte_write64(OTX2_SDP_REQUEST_ISM, iq->inst_cnt_reg); 39 40 /* Modulo of the new index with the IQ size will give us 41 * the new index. 42 */ 43 return iq->inst_cnt & (iq->nb_desc - 1); 44 } 45 46 static inline uint32_t 47 cnxk_ep_check_tx_pkt_reg(void *tx_queue) 48 { 49 struct otx_ep_instr_queue *iq = (struct otx_ep_instr_queue *)tx_queue; 50 uint32_t val; 51 52 val = rte_read32(iq->inst_cnt_reg); 53 iq->inst_cnt += val - iq->inst_cnt_prev; 54 iq->inst_cnt_prev = val; 55 56 if (val > (uint32_t)(1 << 31)) { 57 /* Only subtract the packet count in the HW counter 58 * when count above halfway to saturation. 59 */ 60 rte_write64((uint64_t)val, iq->inst_cnt_reg); 61 rte_mb(); 62 63 iq->inst_cnt_prev = 0; 64 } 65 66 /* Modulo of the new index with the IQ size will give us 67 * the new index. 68 */ 69 return iq->inst_cnt & (iq->nb_desc - 1); 70 } 71 72 static inline void 73 cnxk_ep_flush_iq(struct otx_ep_instr_queue *iq) 74 { 75 const otx_ep_check_pkt_count_t cnxk_tx_pkt_count[2] = { cnxk_ep_check_tx_pkt_reg, 76 cnxk_ep_check_tx_ism_mem }; 77 78 uint32_t instr_processed = 0; 79 uint32_t cnt = 0; 80 81 iq->otx_read_index = cnxk_tx_pkt_count[iq->ism_ena](iq); 82 83 if (unlikely(iq->flush_index == iq->otx_read_index)) 84 return; 85 86 if (iq->flush_index < iq->otx_read_index) { 87 instr_processed = iq->otx_read_index - iq->flush_index; 88 rte_pktmbuf_free_bulk(&iq->mbuf_list[iq->flush_index], instr_processed); 89 iq->flush_index = otx_ep_incr_index(iq->flush_index, instr_processed, iq->nb_desc); 90 } else { 91 cnt = iq->nb_desc - iq->flush_index; 92 rte_pktmbuf_free_bulk(&iq->mbuf_list[iq->flush_index], cnt); 93 iq->flush_index = otx_ep_incr_index(iq->flush_index, cnt, iq->nb_desc); 94 95 instr_processed = iq->otx_read_index; 96 rte_pktmbuf_free_bulk(&iq->mbuf_list[iq->flush_index], instr_processed); 97 iq->flush_index = otx_ep_incr_index(iq->flush_index, instr_processed, iq->nb_desc); 98 99 instr_processed += cnt; 100 } 101 102 iq->stats.instr_processed = instr_processed; 103 iq->instr_pending -= instr_processed; 104 } 105 106 static inline void 107 set_sg_size(struct otx_ep_sg_entry *sg_entry, uint16_t size, uint32_t pos) 108 { 109 #if RTE_BYTE_ORDER == RTE_BIG_ENDIAN 110 sg_entry->u.size[pos] = size; 111 #elif RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN 112 sg_entry->u.size[(OTX_EP_NUM_SG_PTRS - 1) - pos] = size; 113 #endif 114 } 115 116 static __rte_always_inline void 117 cnxk_ep_xmit_pkts_scalar(struct rte_mbuf **tx_pkts, struct otx_ep_instr_queue *iq, uint16_t nb_pkts) 118 { 119 struct cnxk_ep_instr_32B *iqcmd; 120 struct rte_mbuf *m; 121 uint32_t pkt_len; 122 uint32_t tx_bytes = 0; 123 uint32_t write_idx = iq->host_write_index; 124 uint16_t pkts, nb_desc = iq->nb_desc; 125 uint8_t desc_size = iq->desc_size; 126 127 for (pkts = 0; pkts < nb_pkts; pkts++) { 128 m = tx_pkts[pkts]; 129 iq->mbuf_list[write_idx] = m; 130 pkt_len = rte_pktmbuf_data_len(m); 131 132 iqcmd = (struct cnxk_ep_instr_32B *)(iq->base_addr + (write_idx * desc_size)); 133 iqcmd->ih.u64 = iq->partial_ih | pkt_len; 134 iqcmd->dptr = rte_mbuf_data_iova(m); /*dptr*/ 135 tx_bytes += pkt_len; 136 137 /* Increment the host write index */ 138 write_idx = otx_ep_incr_index(write_idx, 1, nb_desc); 139 } 140 iq->host_write_index = write_idx; 141 142 /* ring dbell */ 143 rte_io_wmb(); 144 rte_write64(pkts, iq->doorbell_reg); 145 iq->instr_pending += pkts; 146 iq->stats.tx_pkts += pkts; 147 iq->stats.tx_bytes += tx_bytes; 148 } 149 150 static __rte_always_inline uint16_t 151 cnxk_ep_xmit_pkts_scalar_mseg(struct rte_mbuf **tx_pkts, struct otx_ep_instr_queue *iq, 152 uint16_t nb_pkts) 153 { 154 uint16_t frags, num_sg, mask = OTX_EP_NUM_SG_PTRS - 1; 155 struct otx_ep_buf_free_info *finfo; 156 struct cnxk_ep_instr_32B *iqcmd; 157 struct rte_mbuf *m; 158 uint32_t pkt_len, tx_bytes = 0; 159 uint32_t write_idx = iq->host_write_index; 160 uint16_t pkts, nb_desc = iq->nb_desc; 161 uint8_t desc_size = iq->desc_size; 162 163 for (pkts = 0; pkts < nb_pkts; pkts++) { 164 uint16_t j = 0; 165 166 m = tx_pkts[pkts]; 167 frags = m->nb_segs; 168 169 pkt_len = rte_pktmbuf_pkt_len(m); 170 num_sg = (frags + mask) / OTX_EP_NUM_SG_PTRS; 171 172 if (unlikely(pkt_len > OTX_EP_MAX_PKT_SZ && num_sg > OTX_EP_MAX_SG_LISTS)) { 173 otx_ep_err("Failed to xmit the pkt, pkt_len is higher or pkt has more segments"); 174 goto exit; 175 } 176 177 finfo = &iq->req_list[write_idx].finfo; 178 179 iq->mbuf_list[write_idx] = m; 180 iqcmd = (struct cnxk_ep_instr_32B *)(iq->base_addr + (write_idx * desc_size)); 181 iqcmd->dptr = rte_mem_virt2iova(finfo->g.sg); 182 iqcmd->ih.u64 = iq->partial_ih | (1ULL << 62) | ((uint64_t)frags << 48) | pkt_len; 183 184 while (frags--) { 185 finfo->g.sg[(j >> 2)].ptr[(j & mask)] = rte_mbuf_data_iova(m); 186 set_sg_size(&finfo->g.sg[(j >> 2)], m->data_len, (j & mask)); 187 j++; 188 m = m->next; 189 } 190 191 /* Increment the host write index */ 192 write_idx = otx_ep_incr_index(write_idx, 1, nb_desc); 193 tx_bytes += pkt_len; 194 } 195 exit: 196 iq->host_write_index = write_idx; 197 198 /* ring dbell */ 199 rte_io_wmb(); 200 rte_write64(pkts, iq->doorbell_reg); 201 iq->instr_pending += pkts; 202 iq->stats.tx_pkts += pkts; 203 iq->stats.tx_bytes += tx_bytes; 204 205 return pkts; 206 } 207 208 uint16_t __rte_noinline __rte_hot 209 cnxk_ep_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 210 { 211 struct otx_ep_instr_queue *iq = (struct otx_ep_instr_queue *)tx_queue; 212 uint16_t pkts; 213 214 pkts = RTE_MIN(nb_pkts, iq->nb_desc - iq->instr_pending); 215 216 cnxk_ep_xmit_pkts_scalar(tx_pkts, iq, pkts); 217 218 if (iq->instr_pending >= OTX_EP_MAX_INSTR) 219 cnxk_ep_flush_iq(iq); 220 221 /* Return no# of instructions posted successfully. */ 222 return pkts; 223 } 224 225 uint16_t __rte_noinline __rte_hot 226 cnxk_ep_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 227 { 228 struct otx_ep_instr_queue *iq = (struct otx_ep_instr_queue *)tx_queue; 229 uint16_t pkts; 230 231 pkts = RTE_MIN(nb_pkts, iq->nb_desc - iq->instr_pending); 232 233 pkts = cnxk_ep_xmit_pkts_scalar_mseg(tx_pkts, iq, pkts); 234 235 if (iq->instr_pending >= OTX_EP_MAX_INSTR) 236 cnxk_ep_flush_iq(iq); 237 238 /* Return no# of instructions posted successfully. */ 239 return pkts; 240 } 241