1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Huawei Technologies Co., Ltd 3 */ 4 5 #include <rte_mbuf.h> 6 #include <rte_tcp.h> 7 #include <rte_sctp.h> 8 #include <rte_udp.h> 9 #include <rte_ip.h> 10 #ifdef __ARM64_NEON__ 11 #include <arm_neon.h> 12 #endif 13 14 #include "base/hinic_compat.h" 15 #include "base/hinic_pmd_hwdev.h" 16 #include "base/hinic_pmd_hwif.h" 17 #include "base/hinic_pmd_wq.h" 18 #include "base/hinic_pmd_nicio.h" 19 #include "hinic_pmd_ethdev.h" 20 #include "hinic_pmd_tx.h" 21 22 /* packet header and tx offload info */ 23 #define VXLANLEN 8 24 #define MAX_PLD_OFFSET 221 25 #define MAX_SINGLE_SGE_SIZE 65536 26 #define TSO_ENABLE 1 27 #define TX_MSS_DEFAULT 0x3E00 28 #define TX_MSS_MIN 0x50 29 30 #define HINIC_NONTSO_PKT_MAX_SGE 17 /* non-tso max sge 17 */ 31 #define HINIC_NONTSO_SEG_NUM_INVALID(num) \ 32 ((num) > HINIC_NONTSO_PKT_MAX_SGE) 33 34 #define HINIC_TSO_PKT_MAX_SGE 127 /* tso max sge 127 */ 35 #define HINIC_TSO_SEG_NUM_INVALID(num) ((num) > HINIC_TSO_PKT_MAX_SGE) 36 37 /* sizeof(struct hinic_sq_bufdesc) == 16, shift 4 */ 38 #define HINIC_BUF_DESC_SIZE(nr_descs) (SIZE_8BYTES(((u32)nr_descs) << 4)) 39 40 #define MASKED_SQ_IDX(sq, idx) ((idx) & (sq)->wq->mask) 41 42 /* SQ_CTRL */ 43 #define SQ_CTRL_BUFDESC_SECT_LEN_SHIFT 0 44 #define SQ_CTRL_TASKSECT_LEN_SHIFT 16 45 #define SQ_CTRL_DATA_FORMAT_SHIFT 22 46 #define SQ_CTRL_LEN_SHIFT 29 47 #define SQ_CTRL_OWNER_SHIFT 31 48 49 #define SQ_CTRL_BUFDESC_SECT_LEN_MASK 0xFFU 50 #define SQ_CTRL_TASKSECT_LEN_MASK 0x1FU 51 #define SQ_CTRL_DATA_FORMAT_MASK 0x1U 52 #define SQ_CTRL_LEN_MASK 0x3U 53 #define SQ_CTRL_OWNER_MASK 0x1U 54 55 #define SQ_CTRL_SET(val, member) \ 56 (((val) & SQ_CTRL_##member##_MASK) << SQ_CTRL_##member##_SHIFT) 57 58 #define SQ_CTRL_QUEUE_INFO_PLDOFF_SHIFT 2 59 #define SQ_CTRL_QUEUE_INFO_UFO_SHIFT 10 60 #define SQ_CTRL_QUEUE_INFO_TSO_SHIFT 11 61 #define SQ_CTRL_QUEUE_INFO_TCPUDP_CS_SHIFT 12 62 #define SQ_CTRL_QUEUE_INFO_MSS_SHIFT 13 63 #define SQ_CTRL_QUEUE_INFO_SCTP_SHIFT 27 64 #define SQ_CTRL_QUEUE_INFO_UC_SHIFT 28 65 #define SQ_CTRL_QUEUE_INFO_PRI_SHIFT 29 66 67 #define SQ_CTRL_QUEUE_INFO_PLDOFF_MASK 0xFFU 68 #define SQ_CTRL_QUEUE_INFO_UFO_MASK 0x1U 69 #define SQ_CTRL_QUEUE_INFO_TSO_MASK 0x1U 70 #define SQ_CTRL_QUEUE_INFO_TCPUDP_CS_MASK 0x1U 71 #define SQ_CTRL_QUEUE_INFO_MSS_MASK 0x3FFFU 72 #define SQ_CTRL_QUEUE_INFO_SCTP_MASK 0x1U 73 #define SQ_CTRL_QUEUE_INFO_UC_MASK 0x1U 74 #define SQ_CTRL_QUEUE_INFO_PRI_MASK 0x7U 75 76 #define SQ_CTRL_QUEUE_INFO_SET(val, member) \ 77 (((u32)(val) & SQ_CTRL_QUEUE_INFO_##member##_MASK) << \ 78 SQ_CTRL_QUEUE_INFO_##member##_SHIFT) 79 80 #define SQ_CTRL_QUEUE_INFO_GET(val, member) \ 81 (((val) >> SQ_CTRL_QUEUE_INFO_##member##_SHIFT) & \ 82 SQ_CTRL_QUEUE_INFO_##member##_MASK) 83 84 #define SQ_CTRL_QUEUE_INFO_CLEAR(val, member) \ 85 ((val) & (~(SQ_CTRL_QUEUE_INFO_##member##_MASK << \ 86 SQ_CTRL_QUEUE_INFO_##member##_SHIFT))) 87 88 #define SQ_TASK_INFO0_L2HDR_LEN_SHIFT 0 89 #define SQ_TASK_INFO0_L4OFFLOAD_SHIFT 8 90 #define SQ_TASK_INFO0_INNER_L3TYPE_SHIFT 10 91 #define SQ_TASK_INFO0_VLAN_OFFLOAD_SHIFT 12 92 #define SQ_TASK_INFO0_PARSE_FLAG_SHIFT 13 93 #define SQ_TASK_INFO0_UFO_AVD_SHIFT 14 94 #define SQ_TASK_INFO0_TSO_UFO_SHIFT 15 95 #define SQ_TASK_INFO0_VLAN_TAG_SHIFT 16 96 97 #define SQ_TASK_INFO0_L2HDR_LEN_MASK 0xFFU 98 #define SQ_TASK_INFO0_L4OFFLOAD_MASK 0x3U 99 #define SQ_TASK_INFO0_INNER_L3TYPE_MASK 0x3U 100 #define SQ_TASK_INFO0_VLAN_OFFLOAD_MASK 0x1U 101 #define SQ_TASK_INFO0_PARSE_FLAG_MASK 0x1U 102 #define SQ_TASK_INFO0_UFO_AVD_MASK 0x1U 103 #define SQ_TASK_INFO0_TSO_UFO_MASK 0x1U 104 #define SQ_TASK_INFO0_VLAN_TAG_MASK 0xFFFFU 105 106 #define SQ_TASK_INFO0_SET(val, member) \ 107 (((u32)(val) & SQ_TASK_INFO0_##member##_MASK) << \ 108 SQ_TASK_INFO0_##member##_SHIFT) 109 110 #define SQ_TASK_INFO1_MD_TYPE_SHIFT 8 111 #define SQ_TASK_INFO1_INNER_L4LEN_SHIFT 16 112 #define SQ_TASK_INFO1_INNER_L3LEN_SHIFT 24 113 114 #define SQ_TASK_INFO1_MD_TYPE_MASK 0xFFU 115 #define SQ_TASK_INFO1_INNER_L4LEN_MASK 0xFFU 116 #define SQ_TASK_INFO1_INNER_L3LEN_MASK 0xFFU 117 118 #define SQ_TASK_INFO1_SET(val, member) \ 119 (((val) & SQ_TASK_INFO1_##member##_MASK) << \ 120 SQ_TASK_INFO1_##member##_SHIFT) 121 122 #define SQ_TASK_INFO2_TUNNEL_L4LEN_SHIFT 0 123 #define SQ_TASK_INFO2_OUTER_L3LEN_SHIFT 8 124 #define SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT 16 125 #define SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT 24 126 127 #define SQ_TASK_INFO2_TUNNEL_L4LEN_MASK 0xFFU 128 #define SQ_TASK_INFO2_OUTER_L3LEN_MASK 0xFFU 129 #define SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK 0x7U 130 #define SQ_TASK_INFO2_OUTER_L3TYPE_MASK 0x3U 131 132 #define SQ_TASK_INFO2_SET(val, member) \ 133 (((val) & SQ_TASK_INFO2_##member##_MASK) << \ 134 SQ_TASK_INFO2_##member##_SHIFT) 135 136 #define SQ_TASK_INFO4_L2TYPE_SHIFT 31 137 138 #define SQ_TASK_INFO4_L2TYPE_MASK 0x1U 139 140 #define SQ_TASK_INFO4_SET(val, member) \ 141 (((u32)(val) & SQ_TASK_INFO4_##member##_MASK) << \ 142 SQ_TASK_INFO4_##member##_SHIFT) 143 144 /* SQ_DB */ 145 #define SQ_DB_OFF 0x00000800 146 #define SQ_DB_INFO_HI_PI_SHIFT 0 147 #define SQ_DB_INFO_QID_SHIFT 8 148 #define SQ_DB_INFO_CFLAG_SHIFT 23 149 #define SQ_DB_INFO_COS_SHIFT 24 150 #define SQ_DB_INFO_TYPE_SHIFT 27 151 152 #define SQ_DB_INFO_HI_PI_MASK 0xFFU 153 #define SQ_DB_INFO_QID_MASK 0x3FFU 154 #define SQ_DB_INFO_CFLAG_MASK 0x1U 155 #define SQ_DB_INFO_COS_MASK 0x7U 156 #define SQ_DB_INFO_TYPE_MASK 0x1FU 157 #define SQ_DB_INFO_SET(val, member) \ 158 (((u32)(val) & SQ_DB_INFO_##member##_MASK) << \ 159 SQ_DB_INFO_##member##_SHIFT) 160 161 #define SQ_DB 1 162 #define SQ_CFLAG_DP 0 /* CFLAG_DATA_PATH */ 163 164 #define SQ_DB_PI_LOW_MASK 0xFF 165 #define SQ_DB_PI_LOW(pi) ((pi) & SQ_DB_PI_LOW_MASK) 166 #define SQ_DB_PI_HI_SHIFT 8 167 #define SQ_DB_PI_HIGH(pi) ((pi) >> SQ_DB_PI_HI_SHIFT) 168 #define SQ_DB_ADDR(sq, pi) \ 169 ((u64 *)((u8 __iomem *)((sq)->db_addr) + SQ_DB_OFF) + SQ_DB_PI_LOW(pi)) 170 171 /* txq wq operations */ 172 #define HINIC_GET_SQ_WQE_MASK(txq) ((txq)->wq->mask) 173 174 #define HINIC_GET_SQ_HW_CI(txq) \ 175 ((be16_to_cpu(*(txq)->cons_idx_addr)) & HINIC_GET_SQ_WQE_MASK(txq)) 176 177 #define HINIC_GET_SQ_LOCAL_CI(txq) \ 178 (((txq)->wq->cons_idx) & HINIC_GET_SQ_WQE_MASK(txq)) 179 180 #define HINIC_UPDATE_SQ_LOCAL_CI(txq, wqebb_cnt) \ 181 do { \ 182 (txq)->wq->cons_idx += wqebb_cnt; \ 183 (txq)->wq->delta += wqebb_cnt; \ 184 } while (0) 185 186 #define HINIC_GET_SQ_FREE_WQEBBS(txq) ((txq)->wq->delta - 1) 187 188 #define HINIC_IS_SQ_EMPTY(txq) (((txq)->wq->delta) == ((txq)->q_depth)) 189 190 #define BUF_DESC_SIZE_SHIFT 4 191 192 #define HINIC_SQ_WQE_SIZE(num_sge) \ 193 (sizeof(struct hinic_sq_ctrl) + sizeof(struct hinic_sq_task) + \ 194 (unsigned int)((num_sge) << BUF_DESC_SIZE_SHIFT)) 195 196 #define HINIC_SQ_WQEBB_CNT(num_sge) \ 197 (int)(ALIGN(HINIC_SQ_WQE_SIZE((u32)num_sge), \ 198 HINIC_SQ_WQEBB_SIZE) >> HINIC_SQ_WQEBB_SHIFT) 199 200 201 static inline void hinic_sq_wqe_cpu_to_be32(void *data, int nr_wqebb) 202 { 203 #if defined(__X86_64_SSE__) 204 int i; 205 __m128i *wqe_line = (__m128i *)data; 206 __m128i shuf_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 207 11, 4, 5, 6, 7, 0, 1, 2, 3); 208 209 for (i = 0; i < nr_wqebb; i++) { 210 /* convert 64B wqebb using 4 SSE instructions */ 211 wqe_line[0] = _mm_shuffle_epi8(wqe_line[0], shuf_mask); 212 wqe_line[1] = _mm_shuffle_epi8(wqe_line[1], shuf_mask); 213 wqe_line[2] = _mm_shuffle_epi8(wqe_line[2], shuf_mask); 214 wqe_line[3] = _mm_shuffle_epi8(wqe_line[3], shuf_mask); 215 wqe_line += 4; 216 } 217 #elif defined(__ARM64_NEON__) 218 int i; 219 uint8x16_t *wqe_line = (uint8x16_t *)data; 220 const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 221 9, 8, 15, 14, 13, 12}; 222 223 for (i = 0; i < nr_wqebb; i++) { 224 wqe_line[0] = vqtbl1q_u8(wqe_line[0], shuf_mask); 225 wqe_line[1] = vqtbl1q_u8(wqe_line[1], shuf_mask); 226 wqe_line[2] = vqtbl1q_u8(wqe_line[2], shuf_mask); 227 wqe_line[3] = vqtbl1q_u8(wqe_line[3], shuf_mask); 228 wqe_line += 4; 229 } 230 #else 231 hinic_cpu_to_be32(data, nr_wqebb * HINIC_SQ_WQEBB_SIZE); 232 #endif 233 } 234 235 static inline void hinic_sge_cpu_to_be32(void *data, int nr_sge) 236 { 237 #if defined(__X86_64_SSE__) 238 int i; 239 __m128i *sge_line = (__m128i *)data; 240 __m128i shuf_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 241 11, 4, 5, 6, 7, 0, 1, 2, 3); 242 243 for (i = 0; i < nr_sge; i++) { 244 /* convert 16B sge using 1 SSE instructions */ 245 *sge_line = _mm_shuffle_epi8(*sge_line, shuf_mask); 246 sge_line++; 247 } 248 #elif defined(__ARM64_NEON__) 249 int i; 250 uint8x16_t *sge_line = (uint8x16_t *)data; 251 const uint8x16_t shuf_mask = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 252 9, 8, 15, 14, 13, 12}; 253 254 for (i = 0; i < nr_sge; i++) { 255 *sge_line = vqtbl1q_u8(*sge_line, shuf_mask); 256 sge_line++; 257 } 258 #else 259 hinic_cpu_to_be32(data, nr_sge * sizeof(struct hinic_sq_bufdesc)); 260 #endif 261 } 262 263 void hinic_txq_get_stats(struct hinic_txq *txq, struct hinic_txq_stats *stats) 264 { 265 if (!txq || !stats) { 266 PMD_DRV_LOG(ERR, "Txq or stats is NULL"); 267 return; 268 } 269 270 memcpy(stats, &txq->txq_stats, sizeof(txq->txq_stats)); 271 } 272 273 void hinic_txq_stats_reset(struct hinic_txq *txq) 274 { 275 struct hinic_txq_stats *txq_stats; 276 277 if (txq == NULL) 278 return; 279 280 txq_stats = &txq->txq_stats; 281 memset(txq_stats, 0, sizeof(*txq_stats)); 282 } 283 284 static inline struct rte_mbuf *hinic_copy_tx_mbuf(struct hinic_nic_dev *nic_dev, 285 struct rte_mbuf *mbuf, 286 u16 sge_cnt) 287 { 288 struct rte_mbuf *dst_mbuf; 289 u32 offset = 0; 290 u16 i; 291 292 if (unlikely(!nic_dev->cpy_mpool)) 293 return NULL; 294 295 dst_mbuf = rte_pktmbuf_alloc(nic_dev->cpy_mpool); 296 if (unlikely(!dst_mbuf)) 297 return NULL; 298 299 dst_mbuf->data_off = 0; 300 for (i = 0; i < sge_cnt; i++) { 301 rte_memcpy((char *)dst_mbuf->buf_addr + offset, 302 (char *)mbuf->buf_addr + mbuf->data_off, 303 mbuf->data_len); 304 dst_mbuf->data_len += mbuf->data_len; 305 offset += mbuf->data_len; 306 mbuf = mbuf->next; 307 } 308 309 return dst_mbuf; 310 } 311 312 static inline bool hinic_mbuf_dma_map_sge(struct hinic_txq *txq, 313 struct rte_mbuf *mbuf, 314 struct hinic_sq_bufdesc *sges, 315 struct hinic_wqe_info *sqe_info) 316 { 317 dma_addr_t dma_addr; 318 u16 i, around_sges; 319 u16 nb_segs = sqe_info->sge_cnt - sqe_info->cpy_mbuf_cnt; 320 u16 real_nb_segs = mbuf->nb_segs; 321 struct hinic_sq_bufdesc *sge_idx = sges; 322 323 if (unlikely(sqe_info->around)) { 324 /* parts of wqe is in sq bottom while parts 325 * of wqe is in sq head 326 */ 327 i = 0; 328 for (sge_idx = sges; (u64)sge_idx <= txq->sq_bot_sge_addr; 329 sge_idx++) { 330 dma_addr = rte_mbuf_data_iova(mbuf); 331 hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr, 332 mbuf->data_len); 333 mbuf = mbuf->next; 334 i++; 335 } 336 337 around_sges = nb_segs - i; 338 sge_idx = (struct hinic_sq_bufdesc *) 339 ((void *)txq->sq_head_addr); 340 for (; i < nb_segs; i++) { 341 dma_addr = rte_mbuf_data_iova(mbuf); 342 hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr, 343 mbuf->data_len); 344 mbuf = mbuf->next; 345 sge_idx++; 346 } 347 348 /* covert sges at head to big endian */ 349 hinic_sge_cpu_to_be32((void *)txq->sq_head_addr, around_sges); 350 } else { 351 /* wqe is in continuous space */ 352 for (i = 0; i < nb_segs; i++) { 353 dma_addr = rte_mbuf_data_iova(mbuf); 354 hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr, 355 mbuf->data_len); 356 mbuf = mbuf->next; 357 sge_idx++; 358 } 359 } 360 361 /* for now: support non-tso over 17 sge, copy the last 2 mbuf */ 362 if (unlikely(sqe_info->cpy_mbuf_cnt != 0)) { 363 /* copy invalid mbuf segs to a valid buffer, lost performance */ 364 txq->txq_stats.cpy_pkts += 1; 365 mbuf = hinic_copy_tx_mbuf(txq->nic_dev, mbuf, 366 real_nb_segs - nb_segs); 367 if (unlikely(!mbuf)) 368 return false; 369 370 txq->tx_info[sqe_info->pi].cpy_mbuf = mbuf; 371 372 /* deal with the last mbuf */ 373 dma_addr = rte_mbuf_data_iova(mbuf); 374 hinic_set_sge((struct hinic_sge *)sge_idx, dma_addr, 375 mbuf->data_len); 376 if (unlikely(sqe_info->around)) 377 hinic_sge_cpu_to_be32((void *)sge_idx, 1); 378 } 379 380 return true; 381 } 382 383 static inline void hinic_fill_sq_wqe_header(struct hinic_sq_ctrl *ctrl, 384 u32 queue_info, int nr_descs, 385 u8 owner) 386 { 387 u32 ctrl_size, task_size, bufdesc_size; 388 389 ctrl_size = SIZE_8BYTES(sizeof(struct hinic_sq_ctrl)); 390 task_size = SIZE_8BYTES(sizeof(struct hinic_sq_task)); 391 bufdesc_size = HINIC_BUF_DESC_SIZE(nr_descs); 392 393 ctrl->ctrl_fmt = SQ_CTRL_SET(bufdesc_size, BUFDESC_SECT_LEN) | 394 SQ_CTRL_SET(task_size, TASKSECT_LEN) | 395 SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT) | 396 SQ_CTRL_SET(ctrl_size, LEN) | 397 SQ_CTRL_SET(owner, OWNER); 398 399 ctrl->queue_info = queue_info; 400 ctrl->queue_info |= SQ_CTRL_QUEUE_INFO_SET(1U, UC); 401 402 if (!SQ_CTRL_QUEUE_INFO_GET(ctrl->queue_info, MSS)) { 403 ctrl->queue_info |= 404 SQ_CTRL_QUEUE_INFO_SET(TX_MSS_DEFAULT, MSS); 405 } else if (SQ_CTRL_QUEUE_INFO_GET(ctrl->queue_info, MSS) < TX_MSS_MIN) { 406 /* mss should not be less than 80 */ 407 ctrl->queue_info = 408 SQ_CTRL_QUEUE_INFO_CLEAR(ctrl->queue_info, MSS); 409 ctrl->queue_info |= SQ_CTRL_QUEUE_INFO_SET(TX_MSS_MIN, MSS); 410 } 411 } 412 413 static inline bool hinic_is_tso_sge_valid(struct rte_mbuf *mbuf, 414 struct hinic_tx_offload_info 415 *poff_info, 416 struct hinic_wqe_info *sqe_info) 417 { 418 u32 total_len, limit_len, checked_len, left_len; 419 u32 i, first_mss_sges, left_sges; 420 struct rte_mbuf *mbuf_head, *mbuf_pre; 421 422 left_sges = mbuf->nb_segs; 423 mbuf_head = mbuf; 424 425 /* tso sge number validation */ 426 if (unlikely(left_sges >= HINIC_NONTSO_PKT_MAX_SGE)) { 427 checked_len = 0; 428 limit_len = mbuf->tso_segsz + poff_info->payload_offset; 429 first_mss_sges = HINIC_NONTSO_PKT_MAX_SGE; 430 431 /* each continues 17 mbufs segmust do one check */ 432 while (left_sges >= HINIC_NONTSO_PKT_MAX_SGE) { 433 /* total len of first 16 mbufs must equal 434 * or more than limit_len 435 */ 436 total_len = 0; 437 for (i = 0; i < first_mss_sges; i++) { 438 total_len += mbuf->data_len; 439 mbuf_pre = mbuf; 440 mbuf = mbuf->next; 441 if (total_len >= limit_len) { 442 limit_len = mbuf_head->tso_segsz; 443 break; 444 } 445 } 446 447 checked_len += total_len; 448 449 /* try to copy if not valid */ 450 if (unlikely(first_mss_sges == i)) { 451 left_sges -= first_mss_sges; 452 checked_len -= mbuf_pre->data_len; 453 454 left_len = mbuf_head->pkt_len - checked_len; 455 if (left_len > HINIC_COPY_MBUF_SIZE) 456 return false; 457 458 sqe_info->sge_cnt = mbuf_head->nb_segs - 459 left_sges; 460 sqe_info->cpy_mbuf_cnt = 1; 461 462 return true; 463 } 464 first_mss_sges = (HINIC_NONTSO_PKT_MAX_SGE - 1); 465 466 /* continue next 16 mbufs */ 467 left_sges -= (i + 1); 468 } /* end of while */ 469 } 470 471 sqe_info->sge_cnt = mbuf_head->nb_segs; 472 return true; 473 } 474 475 static inline void 476 hinic_set_l4_csum_info(struct hinic_sq_task *task, 477 u32 *queue_info, struct hinic_tx_offload_info *poff_info) 478 { 479 u32 tcp_udp_cs, sctp; 480 u16 l2hdr_len; 481 482 sctp = 0; 483 if (unlikely(poff_info->inner_l4_type == SCTP_OFFLOAD_ENABLE)) 484 sctp = 1; 485 486 tcp_udp_cs = poff_info->inner_l4_tcp_udp; 487 488 if (poff_info->tunnel_type == TUNNEL_UDP_NO_CSUM) { 489 l2hdr_len = poff_info->outer_l2_len; 490 491 task->pkt_info2 |= 492 SQ_TASK_INFO2_SET(poff_info->outer_l3_type, OUTER_L3TYPE) | 493 SQ_TASK_INFO2_SET(poff_info->outer_l3_len, OUTER_L3LEN); 494 task->pkt_info2 |= 495 SQ_TASK_INFO2_SET(poff_info->tunnel_type, TUNNEL_L4TYPE) | 496 SQ_TASK_INFO2_SET(poff_info->tunnel_length, TUNNEL_L4LEN); 497 } else { 498 l2hdr_len = poff_info->inner_l2_len; 499 } 500 501 task->pkt_info0 |= SQ_TASK_INFO0_SET(l2hdr_len, L2HDR_LEN); 502 task->pkt_info1 |= 503 SQ_TASK_INFO1_SET(poff_info->inner_l3_len, INNER_L3LEN); 504 task->pkt_info0 |= 505 SQ_TASK_INFO0_SET(poff_info->inner_l3_type, INNER_L3TYPE); 506 task->pkt_info1 |= 507 SQ_TASK_INFO1_SET(poff_info->inner_l4_len, INNER_L4LEN); 508 task->pkt_info0 |= 509 SQ_TASK_INFO0_SET(poff_info->inner_l4_type, L4OFFLOAD); 510 *queue_info |= 511 SQ_CTRL_QUEUE_INFO_SET(poff_info->payload_offset, PLDOFF) | 512 SQ_CTRL_QUEUE_INFO_SET(tcp_udp_cs, TCPUDP_CS) | 513 SQ_CTRL_QUEUE_INFO_SET(sctp, SCTP); 514 } 515 516 static inline void 517 hinic_set_tso_info(struct hinic_sq_task *task, 518 u32 *queue_info, struct rte_mbuf *mbuf, 519 struct hinic_tx_offload_info *poff_info) 520 { 521 hinic_set_l4_csum_info(task, queue_info, poff_info); 522 523 /* wqe for tso */ 524 task->pkt_info0 |= 525 SQ_TASK_INFO0_SET(poff_info->inner_l3_type, INNER_L3TYPE); 526 task->pkt_info0 |= SQ_TASK_INFO0_SET(TSO_ENABLE, TSO_UFO); 527 *queue_info |= SQ_CTRL_QUEUE_INFO_SET(TSO_ENABLE, TSO); 528 /* qsf was initialized in prepare_sq_wqe */ 529 *queue_info = SQ_CTRL_QUEUE_INFO_CLEAR(*queue_info, MSS); 530 *queue_info |= SQ_CTRL_QUEUE_INFO_SET(mbuf->tso_segsz, MSS); 531 } 532 533 static inline void 534 hinic_set_vlan_tx_offload(struct hinic_sq_task *task, 535 u32 *queue_info, u16 vlan_tag, u16 vlan_pri) 536 { 537 task->pkt_info0 |= SQ_TASK_INFO0_SET(vlan_tag, VLAN_TAG) | 538 SQ_TASK_INFO0_SET(1U, VLAN_OFFLOAD); 539 540 *queue_info |= SQ_CTRL_QUEUE_INFO_SET(vlan_pri, PRI); 541 } 542 543 static inline void 544 hinic_fill_tx_offload_info(struct rte_mbuf *mbuf, 545 struct hinic_sq_task *task, u32 *queue_info, 546 struct hinic_tx_offload_info *tx_off_info) 547 { 548 u16 vlan_tag; 549 uint64_t ol_flags = mbuf->ol_flags; 550 551 /* clear DW0~2 of task section for offload */ 552 task->pkt_info0 = 0; 553 task->pkt_info1 = 0; 554 task->pkt_info2 = 0; 555 556 /* Base VLAN */ 557 if (unlikely(ol_flags & PKT_TX_VLAN_PKT)) { 558 vlan_tag = mbuf->vlan_tci; 559 hinic_set_vlan_tx_offload(task, queue_info, vlan_tag, 560 vlan_tag >> VLAN_PRIO_SHIFT); 561 } 562 563 /* non checksum or tso */ 564 if (unlikely(!(ol_flags & HINIC_TX_CKSUM_OFFLOAD_MASK))) 565 return; 566 567 if ((ol_flags & PKT_TX_TCP_SEG)) 568 /* set tso info for task and qsf */ 569 hinic_set_tso_info(task, queue_info, mbuf, tx_off_info); 570 else /* just support l4 checksum offload */ 571 hinic_set_l4_csum_info(task, queue_info, tx_off_info); 572 } 573 574 static inline void hinic_xmit_mbuf_cleanup(struct hinic_txq *txq) 575 { 576 struct hinic_tx_info *tx_info; 577 struct rte_mbuf *mbuf, *m, *mbuf_free[HINIC_MAX_TX_FREE_BULK]; 578 int i, nb_free = 0; 579 u16 hw_ci, sw_ci, sq_mask; 580 int wqebb_cnt = 0; 581 582 hw_ci = HINIC_GET_SQ_HW_CI(txq); 583 sw_ci = HINIC_GET_SQ_LOCAL_CI(txq); 584 sq_mask = HINIC_GET_SQ_WQE_MASK(txq); 585 586 for (i = 0; i < txq->tx_free_thresh; ++i) { 587 tx_info = &txq->tx_info[sw_ci]; 588 if (hw_ci == sw_ci || 589 (((hw_ci - sw_ci) & sq_mask) < tx_info->wqebb_cnt)) 590 break; 591 592 sw_ci = (sw_ci + tx_info->wqebb_cnt) & sq_mask; 593 594 if (unlikely(tx_info->cpy_mbuf != NULL)) { 595 rte_pktmbuf_free(tx_info->cpy_mbuf); 596 tx_info->cpy_mbuf = NULL; 597 } 598 599 wqebb_cnt += tx_info->wqebb_cnt; 600 mbuf = tx_info->mbuf; 601 602 if (likely(mbuf->nb_segs == 1)) { 603 m = rte_pktmbuf_prefree_seg(mbuf); 604 tx_info->mbuf = NULL; 605 606 if (unlikely(m == NULL)) 607 continue; 608 609 mbuf_free[nb_free++] = m; 610 if (unlikely(m->pool != mbuf_free[0]->pool || 611 nb_free >= HINIC_MAX_TX_FREE_BULK)) { 612 rte_mempool_put_bulk(mbuf_free[0]->pool, 613 (void **)mbuf_free, (nb_free - 1)); 614 nb_free = 0; 615 mbuf_free[nb_free++] = m; 616 } 617 } else { 618 rte_pktmbuf_free(mbuf); 619 tx_info->mbuf = NULL; 620 } 621 } 622 623 if (nb_free > 0) 624 rte_mempool_put_bulk(mbuf_free[0]->pool, (void **)mbuf_free, 625 nb_free); 626 627 HINIC_UPDATE_SQ_LOCAL_CI(txq, wqebb_cnt); 628 } 629 630 static inline struct hinic_sq_wqe * 631 hinic_get_sq_wqe(struct hinic_txq *txq, int wqebb_cnt, 632 struct hinic_wqe_info *wqe_info) 633 { 634 u32 cur_pi, end_pi; 635 u16 remain_wqebbs; 636 struct hinic_sq *sq = txq->sq; 637 struct hinic_wq *wq = txq->wq; 638 639 /* record current pi */ 640 cur_pi = MASKED_WQE_IDX(wq, wq->prod_idx); 641 end_pi = cur_pi + wqebb_cnt; 642 643 /* update next pi and delta */ 644 wq->prod_idx += wqebb_cnt; 645 wq->delta -= wqebb_cnt; 646 647 /* return current pi and owner */ 648 wqe_info->pi = cur_pi; 649 wqe_info->owner = sq->owner; 650 wqe_info->around = 0; 651 wqe_info->seq_wqebbs = wqebb_cnt; 652 653 if (unlikely(end_pi >= txq->q_depth)) { 654 /* update owner of next prod_idx */ 655 sq->owner = !sq->owner; 656 657 /* turn around to head */ 658 if (unlikely(end_pi > txq->q_depth)) { 659 wqe_info->around = 1; 660 remain_wqebbs = txq->q_depth - cur_pi; 661 wqe_info->seq_wqebbs = remain_wqebbs; 662 } 663 } 664 665 return (struct hinic_sq_wqe *)WQ_WQE_ADDR(wq, cur_pi); 666 } 667 668 static inline int 669 hinic_validate_tx_offload(const struct rte_mbuf *m) 670 { 671 uint64_t ol_flags = m->ol_flags; 672 uint64_t inner_l3_offset = m->l2_len; 673 674 /* just support vxlan offload */ 675 if ((ol_flags & PKT_TX_TUNNEL_MASK) && 676 !(ol_flags & PKT_TX_TUNNEL_VXLAN)) 677 return -ENOTSUP; 678 679 if (ol_flags & PKT_TX_OUTER_IP_CKSUM) 680 inner_l3_offset += m->outer_l2_len + m->outer_l3_len; 681 682 /* Headers are fragmented */ 683 if (rte_pktmbuf_data_len(m) < inner_l3_offset + m->l3_len + m->l4_len) 684 return -ENOTSUP; 685 686 /* IP checksum can be counted only for IPv4 packet */ 687 if ((ol_flags & PKT_TX_IP_CKSUM) && (ol_flags & PKT_TX_IPV6)) 688 return -EINVAL; 689 690 /* IP type not set when required */ 691 if (ol_flags & (PKT_TX_L4_MASK | PKT_TX_TCP_SEG)) { 692 if (!(ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6))) 693 return -EINVAL; 694 } 695 696 /* Check requirements for TSO packet */ 697 if (ol_flags & PKT_TX_TCP_SEG) { 698 if (m->tso_segsz == 0 || 699 ((ol_flags & PKT_TX_IPV4) && 700 !(ol_flags & PKT_TX_IP_CKSUM))) 701 return -EINVAL; 702 } 703 704 /* PKT_TX_OUTER_IP_CKSUM set for non outer IPv4 packet. */ 705 if ((ol_flags & PKT_TX_OUTER_IP_CKSUM) && 706 !(ol_flags & PKT_TX_OUTER_IPV4)) 707 return -EINVAL; 708 709 return 0; 710 } 711 712 static inline uint16_t 713 hinic_ipv4_phdr_cksum(const struct rte_ipv4_hdr *ipv4_hdr, uint64_t ol_flags) 714 { 715 struct ipv4_psd_header { 716 uint32_t src_addr; /* IP address of source host. */ 717 uint32_t dst_addr; /* IP address of destination host. */ 718 uint8_t zero; /* zero. */ 719 uint8_t proto; /* L4 protocol type. */ 720 uint16_t len; /* L4 length. */ 721 } psd_hdr; 722 uint8_t ihl; 723 724 psd_hdr.src_addr = ipv4_hdr->src_addr; 725 psd_hdr.dst_addr = ipv4_hdr->dst_addr; 726 psd_hdr.zero = 0; 727 psd_hdr.proto = ipv4_hdr->next_proto_id; 728 if (ol_flags & PKT_TX_TCP_SEG) { 729 psd_hdr.len = 0; 730 } else { 731 /* ipv4_hdr->version_ihl is uint8_t big endian, ihl locates 732 * lower 4 bits and unit is 4 bytes 733 */ 734 ihl = (ipv4_hdr->version_ihl & 0xF) << 2; 735 psd_hdr.len = 736 rte_cpu_to_be_16(rte_be_to_cpu_16(ipv4_hdr->total_length) - 737 ihl); 738 } 739 return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr)); 740 } 741 742 static inline uint16_t 743 hinic_ipv6_phdr_cksum(const struct rte_ipv6_hdr *ipv6_hdr, uint64_t ol_flags) 744 { 745 uint32_t sum; 746 struct { 747 uint32_t len; /* L4 length. */ 748 uint32_t proto; /* L4 protocol - top 3 bytes must be zero */ 749 } psd_hdr; 750 751 psd_hdr.proto = (ipv6_hdr->proto << 24); 752 if (ol_flags & PKT_TX_TCP_SEG) 753 psd_hdr.len = 0; 754 else 755 psd_hdr.len = ipv6_hdr->payload_len; 756 757 sum = __rte_raw_cksum(ipv6_hdr->src_addr, 758 sizeof(ipv6_hdr->src_addr) + sizeof(ipv6_hdr->dst_addr), 0); 759 sum = __rte_raw_cksum(&psd_hdr, sizeof(psd_hdr), sum); 760 return __rte_raw_cksum_reduce(sum); 761 } 762 763 static inline int 764 hinic_tx_offload_pkt_prepare(struct rte_mbuf *m, 765 struct hinic_tx_offload_info *off_info) 766 { 767 struct rte_ipv4_hdr *ipv4_hdr; 768 struct rte_ipv6_hdr *ipv6_hdr; 769 struct rte_tcp_hdr *tcp_hdr; 770 struct rte_udp_hdr *udp_hdr; 771 struct rte_ether_hdr *eth_hdr; 772 struct rte_vlan_hdr *vlan_hdr; 773 u16 eth_type = 0; 774 uint64_t inner_l3_offset = m->l2_len; 775 uint64_t ol_flags = m->ol_flags; 776 777 /* Does packet set any of available offloads */ 778 if (!(ol_flags & HINIC_TX_CKSUM_OFFLOAD_MASK)) 779 return 0; 780 781 if (unlikely(hinic_validate_tx_offload(m))) 782 return -EINVAL; 783 784 if ((ol_flags & PKT_TX_OUTER_IP_CKSUM) || 785 (ol_flags & PKT_TX_OUTER_IPV6) || 786 (ol_flags & PKT_TX_TUNNEL_VXLAN)) { 787 inner_l3_offset += m->outer_l2_len + m->outer_l3_len; 788 off_info->outer_l2_len = m->outer_l2_len; 789 off_info->outer_l3_len = m->outer_l3_len; 790 /* just support vxlan tunneling pkt */ 791 off_info->inner_l2_len = m->l2_len - VXLANLEN - 792 sizeof(struct rte_udp_hdr); 793 off_info->inner_l3_len = m->l3_len; 794 off_info->inner_l4_len = m->l4_len; 795 off_info->tunnel_length = m->l2_len; 796 off_info->payload_offset = m->outer_l2_len + 797 m->outer_l3_len + m->l2_len + m->l3_len; 798 off_info->tunnel_type = TUNNEL_UDP_NO_CSUM; 799 } else { 800 off_info->inner_l2_len = m->l2_len; 801 off_info->inner_l3_len = m->l3_len; 802 off_info->inner_l4_len = m->l4_len; 803 off_info->tunnel_type = NOT_TUNNEL; 804 off_info->payload_offset = m->l2_len + m->l3_len; 805 } 806 807 if (((ol_flags & PKT_TX_L4_MASK) != PKT_TX_SCTP_CKSUM) && 808 ((ol_flags & PKT_TX_L4_MASK) != PKT_TX_UDP_CKSUM)) 809 off_info->payload_offset += m->l4_len; 810 811 /* invalid udp or tcp header */ 812 if (unlikely(off_info->payload_offset > MAX_PLD_OFFSET)) 813 return -EINVAL; 814 815 /* Process outter udp pseudo-header checksum */ 816 if ((ol_flags & PKT_TX_TUNNEL_VXLAN) && ((ol_flags & PKT_TX_TCP_SEG) || 817 (ol_flags & PKT_TX_OUTER_IP_CKSUM) || 818 (ol_flags & PKT_TX_OUTER_IPV6))) { 819 off_info->tunnel_type = TUNNEL_UDP_CSUM; 820 821 /* inner_l4_tcp_udp csum should be setted to calculate outter 822 * udp checksum when vxlan packets without inner l3 and l4 823 */ 824 off_info->inner_l4_tcp_udp = 1; 825 826 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); 827 eth_type = rte_be_to_cpu_16(eth_hdr->ether_type); 828 829 if (eth_type == RTE_ETHER_TYPE_VLAN) { 830 vlan_hdr = (struct rte_vlan_hdr *)(eth_hdr + 1); 831 eth_type = rte_be_to_cpu_16(vlan_hdr->eth_proto); 832 } 833 834 if (eth_type == RTE_ETHER_TYPE_IPV4) { 835 ipv4_hdr = 836 rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, 837 m->outer_l2_len); 838 off_info->outer_l3_type = IPV4_PKT_WITH_CHKSUM_OFFLOAD; 839 ipv4_hdr->hdr_checksum = 0; 840 841 udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr + 842 m->outer_l3_len); 843 udp_hdr->dgram_cksum = 844 hinic_ipv4_phdr_cksum(ipv4_hdr, ol_flags); 845 } else if (eth_type == RTE_ETHER_TYPE_IPV6) { 846 off_info->outer_l3_type = IPV6_PKT; 847 ipv6_hdr = 848 rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, 849 m->outer_l2_len); 850 851 udp_hdr = 852 rte_pktmbuf_mtod_offset(m, struct rte_udp_hdr *, 853 (m->outer_l2_len + 854 m->outer_l3_len)); 855 udp_hdr->dgram_cksum = 856 hinic_ipv6_phdr_cksum(ipv6_hdr, ol_flags); 857 } 858 } 859 860 if (ol_flags & PKT_TX_IPV4) 861 off_info->inner_l3_type = (ol_flags & PKT_TX_IP_CKSUM) ? 862 IPV4_PKT_WITH_CHKSUM_OFFLOAD : 863 IPV4_PKT_NO_CHKSUM_OFFLOAD; 864 else if (ol_flags & PKT_TX_IPV6) 865 off_info->inner_l3_type = IPV6_PKT; 866 867 /* Process the pseudo-header checksum */ 868 if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM) { 869 if (ol_flags & PKT_TX_IPV4) { 870 ipv4_hdr = 871 rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, 872 inner_l3_offset); 873 874 if (ol_flags & PKT_TX_IP_CKSUM) 875 ipv4_hdr->hdr_checksum = 0; 876 877 udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr + 878 m->l3_len); 879 udp_hdr->dgram_cksum = 880 hinic_ipv4_phdr_cksum(ipv4_hdr, ol_flags); 881 } else { 882 ipv6_hdr = 883 rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, 884 inner_l3_offset); 885 886 udp_hdr = 887 rte_pktmbuf_mtod_offset(m, struct rte_udp_hdr *, 888 (inner_l3_offset + m->l3_len)); 889 udp_hdr->dgram_cksum = 890 hinic_ipv6_phdr_cksum(ipv6_hdr, ol_flags); 891 } 892 893 off_info->inner_l4_type = UDP_OFFLOAD_ENABLE; 894 off_info->inner_l4_tcp_udp = 1; 895 off_info->inner_l4_len = sizeof(struct rte_udp_hdr); 896 } else if (((ol_flags & PKT_TX_L4_MASK) == PKT_TX_TCP_CKSUM) || 897 (ol_flags & PKT_TX_TCP_SEG)) { 898 if (ol_flags & PKT_TX_IPV4) { 899 ipv4_hdr = 900 rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, 901 inner_l3_offset); 902 903 if (ol_flags & PKT_TX_IP_CKSUM) 904 ipv4_hdr->hdr_checksum = 0; 905 906 /* non-TSO tcp */ 907 tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr + 908 m->l3_len); 909 tcp_hdr->cksum = 910 hinic_ipv4_phdr_cksum(ipv4_hdr, ol_flags); 911 } else { 912 ipv6_hdr = 913 rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *, 914 inner_l3_offset); 915 /* non-TSO tcp */ 916 tcp_hdr = 917 rte_pktmbuf_mtod_offset(m, struct rte_tcp_hdr *, 918 (inner_l3_offset + m->l3_len)); 919 tcp_hdr->cksum = 920 hinic_ipv6_phdr_cksum(ipv6_hdr, ol_flags); 921 } 922 923 off_info->inner_l4_type = TCP_OFFLOAD_ENABLE; 924 off_info->inner_l4_tcp_udp = 1; 925 } else if ((ol_flags & PKT_TX_L4_MASK) == PKT_TX_SCTP_CKSUM) { 926 off_info->inner_l4_type = SCTP_OFFLOAD_ENABLE; 927 off_info->inner_l4_tcp_udp = 0; 928 off_info->inner_l4_len = sizeof(struct rte_sctp_hdr); 929 } 930 931 return 0; 932 } 933 934 static inline bool hinic_get_sge_txoff_info(struct rte_mbuf *mbuf_pkt, 935 struct hinic_wqe_info *sqe_info, 936 struct hinic_tx_offload_info 937 *off_info) 938 { 939 u16 i, total_len, sge_cnt = mbuf_pkt->nb_segs; 940 struct rte_mbuf *mbuf; 941 int ret; 942 943 memset(off_info, 0, sizeof(*off_info)); 944 945 ret = hinic_tx_offload_pkt_prepare(mbuf_pkt, off_info); 946 if (unlikely(ret)) 947 return false; 948 949 sqe_info->cpy_mbuf_cnt = 0; 950 951 /* non tso mbuf */ 952 if (likely(!(mbuf_pkt->ol_flags & PKT_TX_TCP_SEG))) { 953 if (unlikely(mbuf_pkt->pkt_len > MAX_SINGLE_SGE_SIZE)) { 954 /* non tso packet len must less than 64KB */ 955 return false; 956 } else if (unlikely(HINIC_NONTSO_SEG_NUM_INVALID(sge_cnt))) { 957 /* non tso packet buffer number must less than 17 958 * the mbuf segs more than 17 must copy to one buffer 959 */ 960 total_len = 0; 961 mbuf = mbuf_pkt; 962 for (i = 0; i < (HINIC_NONTSO_PKT_MAX_SGE - 1) ; i++) { 963 total_len += mbuf->data_len; 964 mbuf = mbuf->next; 965 } 966 967 /* default support copy total 4k mbuf segs */ 968 if ((u32)(total_len + (u16)HINIC_COPY_MBUF_SIZE) < 969 mbuf_pkt->pkt_len) 970 return false; 971 972 sqe_info->sge_cnt = HINIC_NONTSO_PKT_MAX_SGE; 973 sqe_info->cpy_mbuf_cnt = 1; 974 return true; 975 } 976 977 /* valid non tso mbuf */ 978 sqe_info->sge_cnt = sge_cnt; 979 } else { 980 /* tso mbuf */ 981 if (unlikely(HINIC_TSO_SEG_NUM_INVALID(sge_cnt))) 982 /* too many mbuf segs */ 983 return false; 984 985 /* check tso mbuf segs are valid or not */ 986 if (unlikely(!hinic_is_tso_sge_valid(mbuf_pkt, 987 off_info, sqe_info))) 988 return false; 989 } 990 991 return true; 992 } 993 994 static inline void hinic_sq_write_db(struct hinic_sq *sq, int cos) 995 { 996 u16 prod_idx; 997 u32 hi_prod_idx; 998 struct hinic_sq_db sq_db; 999 1000 prod_idx = MASKED_SQ_IDX(sq, sq->wq->prod_idx); 1001 hi_prod_idx = SQ_DB_PI_HIGH(prod_idx); 1002 1003 sq_db.db_info = SQ_DB_INFO_SET(hi_prod_idx, HI_PI) | 1004 SQ_DB_INFO_SET(SQ_DB, TYPE) | 1005 SQ_DB_INFO_SET(SQ_CFLAG_DP, CFLAG) | 1006 SQ_DB_INFO_SET(cos, COS) | 1007 SQ_DB_INFO_SET(sq->q_id, QID); 1008 1009 /* Data should be written to HW in Big Endian Format */ 1010 sq_db.db_info = cpu_to_be32(sq_db.db_info); 1011 1012 /* Write all before the doorbell */ 1013 rte_wmb(); 1014 writel(sq_db.db_info, SQ_DB_ADDR(sq, prod_idx)); 1015 } 1016 1017 u16 hinic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, u16 nb_pkts) 1018 { 1019 int free_wqebb_cnt, wqe_wqebb_cnt; 1020 u32 queue_info, tx_bytes = 0; 1021 u16 nb_tx; 1022 struct hinic_wqe_info sqe_info; 1023 struct hinic_tx_offload_info off_info; 1024 struct rte_mbuf *mbuf_pkt; 1025 struct hinic_txq *txq = tx_queue; 1026 struct hinic_tx_info *tx_info; 1027 struct hinic_sq_wqe *sq_wqe; 1028 struct hinic_sq_task *task; 1029 1030 /* reclaim tx mbuf before xmit new packet */ 1031 if (HINIC_GET_SQ_FREE_WQEBBS(txq) < txq->tx_free_thresh) 1032 hinic_xmit_mbuf_cleanup(txq); 1033 1034 /* tx loop routine */ 1035 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { 1036 mbuf_pkt = *tx_pkts++; 1037 queue_info = 0; 1038 1039 /* 1. parse sge and tx offlod info from mbuf */ 1040 if (unlikely(!hinic_get_sge_txoff_info(mbuf_pkt, 1041 &sqe_info, &off_info))) { 1042 txq->txq_stats.off_errs++; 1043 break; 1044 } 1045 1046 /* 2. try to get enough wqebb */ 1047 wqe_wqebb_cnt = HINIC_SQ_WQEBB_CNT(sqe_info.sge_cnt); 1048 free_wqebb_cnt = HINIC_GET_SQ_FREE_WQEBBS(txq); 1049 if (unlikely(wqe_wqebb_cnt > free_wqebb_cnt)) { 1050 /* reclaim again */ 1051 hinic_xmit_mbuf_cleanup(txq); 1052 free_wqebb_cnt = HINIC_GET_SQ_FREE_WQEBBS(txq); 1053 if (unlikely(wqe_wqebb_cnt > free_wqebb_cnt)) { 1054 txq->txq_stats.tx_busy += (nb_pkts - nb_tx); 1055 break; 1056 } 1057 } 1058 1059 /* 3. get sq tail wqe address from wqe_page, 1060 * sq have enough wqebb for this packet 1061 */ 1062 sq_wqe = hinic_get_sq_wqe(txq, wqe_wqebb_cnt, &sqe_info); 1063 1064 /* 4. fill sq wqe sge section */ 1065 if (unlikely(!hinic_mbuf_dma_map_sge(txq, mbuf_pkt, 1066 sq_wqe->buf_descs, 1067 &sqe_info))) { 1068 hinic_return_sq_wqe(txq->nic_dev->hwdev, txq->q_id, 1069 wqe_wqebb_cnt, sqe_info.owner); 1070 txq->txq_stats.off_errs++; 1071 break; 1072 } 1073 1074 /* 5. fill sq wqe task section and queue info */ 1075 task = &sq_wqe->task; 1076 1077 /* tx packet offload configure */ 1078 hinic_fill_tx_offload_info(mbuf_pkt, task, &queue_info, 1079 &off_info); 1080 1081 /* 6. record tx info */ 1082 tx_info = &txq->tx_info[sqe_info.pi]; 1083 tx_info->mbuf = mbuf_pkt; 1084 tx_info->wqebb_cnt = wqe_wqebb_cnt; 1085 1086 /* 7. fill sq wqe header section */ 1087 hinic_fill_sq_wqe_header(&sq_wqe->ctrl, queue_info, 1088 sqe_info.sge_cnt, sqe_info.owner); 1089 1090 /* 8.convert continue or bottom wqe byteorder to big endian */ 1091 hinic_sq_wqe_cpu_to_be32(sq_wqe, sqe_info.seq_wqebbs); 1092 1093 tx_bytes += mbuf_pkt->pkt_len; 1094 } 1095 1096 /* 9. write sq doorbell in burst mode */ 1097 if (nb_tx) { 1098 hinic_sq_write_db(txq->sq, txq->cos); 1099 1100 txq->txq_stats.packets += nb_tx; 1101 txq->txq_stats.bytes += tx_bytes; 1102 } 1103 txq->txq_stats.burst_pkts = nb_tx; 1104 1105 return nb_tx; 1106 } 1107 1108 void hinic_free_all_tx_skbs(struct hinic_txq *txq) 1109 { 1110 u16 ci; 1111 struct hinic_nic_dev *nic_dev = txq->nic_dev; 1112 struct hinic_tx_info *tx_info; 1113 int free_wqebbs = hinic_get_sq_free_wqebbs(nic_dev->hwdev, 1114 txq->q_id) + 1; 1115 1116 while (free_wqebbs < txq->q_depth) { 1117 ci = hinic_get_sq_local_ci(nic_dev->hwdev, txq->q_id); 1118 1119 tx_info = &txq->tx_info[ci]; 1120 1121 if (unlikely(tx_info->cpy_mbuf != NULL)) { 1122 rte_pktmbuf_free(tx_info->cpy_mbuf); 1123 tx_info->cpy_mbuf = NULL; 1124 } 1125 1126 rte_pktmbuf_free(tx_info->mbuf); 1127 hinic_update_sq_local_ci(nic_dev->hwdev, txq->q_id, 1128 tx_info->wqebb_cnt); 1129 1130 free_wqebbs += tx_info->wqebb_cnt; 1131 tx_info->mbuf = NULL; 1132 } 1133 } 1134 1135 void hinic_free_all_tx_resources(struct rte_eth_dev *eth_dev) 1136 { 1137 u16 q_id; 1138 struct hinic_nic_dev *nic_dev = 1139 HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(eth_dev); 1140 1141 for (q_id = 0; q_id < nic_dev->num_sq; q_id++) { 1142 eth_dev->data->tx_queues[q_id] = NULL; 1143 1144 if (nic_dev->txqs[q_id] == NULL) 1145 continue; 1146 1147 /* stop tx queue free tx mbuf */ 1148 hinic_free_all_tx_skbs(nic_dev->txqs[q_id]); 1149 hinic_free_tx_resources(nic_dev->txqs[q_id]); 1150 1151 /* free txq */ 1152 kfree(nic_dev->txqs[q_id]); 1153 nic_dev->txqs[q_id] = NULL; 1154 } 1155 } 1156 1157 void hinic_free_all_tx_mbuf(struct rte_eth_dev *eth_dev) 1158 { 1159 u16 q_id; 1160 struct hinic_nic_dev *nic_dev = 1161 HINIC_ETH_DEV_TO_PRIVATE_NIC_DEV(eth_dev); 1162 1163 for (q_id = 0; q_id < nic_dev->num_sq; q_id++) 1164 /* stop tx queue free tx mbuf */ 1165 hinic_free_all_tx_skbs(nic_dev->txqs[q_id]); 1166 } 1167 1168 int hinic_setup_tx_resources(struct hinic_txq *txq) 1169 { 1170 u64 tx_info_sz; 1171 1172 tx_info_sz = txq->q_depth * sizeof(*txq->tx_info); 1173 txq->tx_info = kzalloc_aligned(tx_info_sz, GFP_KERNEL); 1174 if (!txq->tx_info) 1175 return -ENOMEM; 1176 1177 return HINIC_OK; 1178 } 1179 1180 void hinic_free_tx_resources(struct hinic_txq *txq) 1181 { 1182 if (txq->tx_info == NULL) 1183 return; 1184 1185 kfree(txq->tx_info); 1186 txq->tx_info = NULL; 1187 } 1188 1189 int hinic_create_sq(struct hinic_hwdev *hwdev, u16 q_id, u16 sq_depth) 1190 { 1191 int err; 1192 struct hinic_nic_io *nic_io = hwdev->nic_io; 1193 struct hinic_qp *qp = &nic_io->qps[q_id]; 1194 struct hinic_sq *sq = &qp->sq; 1195 void __iomem *db_addr; 1196 volatile u32 *ci_addr; 1197 1198 sq->sq_depth = sq_depth; 1199 nic_io->sq_depth = sq_depth; 1200 1201 /* alloc wq */ 1202 err = hinic_wq_allocate(nic_io->hwdev, &nic_io->sq_wq[q_id], 1203 HINIC_SQ_WQEBB_SHIFT, nic_io->sq_depth); 1204 if (err) { 1205 PMD_DRV_LOG(ERR, "Failed to allocate WQ for SQ"); 1206 return err; 1207 } 1208 1209 /* alloc sq doorbell space */ 1210 err = hinic_alloc_db_addr(nic_io->hwdev, &db_addr); 1211 if (err) { 1212 PMD_DRV_LOG(ERR, "Failed to init db addr"); 1213 goto alloc_db_err; 1214 } 1215 1216 /* clear hardware ci */ 1217 ci_addr = (volatile u32 *)HINIC_CI_VADDR(nic_io->ci_vaddr_base, q_id); 1218 *ci_addr = 0; 1219 1220 sq->q_id = q_id; 1221 sq->wq = &nic_io->sq_wq[q_id]; 1222 sq->owner = 1; 1223 sq->cons_idx_addr = (volatile u16 *)ci_addr; 1224 sq->db_addr = db_addr; 1225 1226 return HINIC_OK; 1227 1228 alloc_db_err: 1229 hinic_wq_free(nic_io->hwdev, &nic_io->sq_wq[q_id]); 1230 1231 return err; 1232 } 1233 1234 void hinic_destroy_sq(struct hinic_hwdev *hwdev, u16 q_id) 1235 { 1236 struct hinic_nic_io *nic_io; 1237 struct hinic_qp *qp; 1238 1239 nic_io = hwdev->nic_io; 1240 qp = &nic_io->qps[q_id]; 1241 1242 if (qp->sq.wq == NULL) 1243 return; 1244 1245 hinic_free_db_addr(nic_io->hwdev, qp->sq.db_addr); 1246 hinic_wq_free(nic_io->hwdev, qp->sq.wq); 1247 qp->sq.wq = NULL; 1248 } 1249